xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 4fe895cd02347dacd17b296e50adb20a03bf70f2)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
103       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
104       N = nrow++ - 1;  \
105       /* shift up all the later entries in this row */ \
106       for (ii=N; ii>=_i; ii--) { \
107         rp[ii+1] = rp[ii]; \
108         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
109       } \
110       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
111       rp[_i]                      = bcol;  \
112       ap[bs2*_i + bs*cidx + ridx] = value;  \
113       a_noinsert:; \
114     ailen[brow] = nrow; \
115 }
116 #ifndef MatSetValues_SeqBAIJ_B_Private
117 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
118 { \
119     brow = row/bs;  \
120     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
121     rmax = bimax[brow]; nrow = bilen[brow]; \
122       bcol = col/bs; \
123       ridx = row % bs; cidx = col % bs; \
124       low = 0; high = nrow; \
125       while (high-low > 3) { \
126         t = (low+high)/2; \
127         if (rp[t] > bcol) high = t; \
128         else              low  = t; \
129       } \
130       for (_i=low; _i<high; _i++) { \
131         if (rp[_i] > bcol) break; \
132         if (rp[_i] == bcol) { \
133           bap  = ap +  bs2*_i + bs*cidx + ridx; \
134           if (addv == ADD_VALUES) *bap += value;  \
135           else                    *bap  = value;  \
136           goto b_noinsert; \
137         } \
138       } \
139       if (b->nonew == 1) goto b_noinsert; \
140       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
141       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       b_noinsert:; \
152     bilen[brow] = nrow; \
153 }
154 #endif
155 
156 #if defined(PETSC_USE_MAT_SINGLE)
157 #undef __FUNCT__
158 #define __FUNCT__ "MatSetValues_MPISBAIJ"
159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
160 {
161   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,N = m*n;
164   MatScalar      *vsingle;
165 
166   PetscFunctionBegin;
167   if (N > b->setvalueslen) {
168     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
169     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
170     b->setvalueslen  = N;
171   }
172   vsingle = b->setvaluescopy;
173 
174   for (i=0; i<N; i++) {
175     vsingle[i] = v[i];
176   }
177   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
184 {
185   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
186   PetscErrorCode ierr;
187   PetscInt       i,N = m*n*b->bs2;
188   MatScalar      *vsingle;
189 
190   PetscFunctionBegin;
191   if (N > b->setvalueslen) {
192     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
193     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
194     b->setvalueslen  = N;
195   }
196   vsingle = b->setvaluescopy;
197   for (i=0; i<N; i++) {
198     vsingle[i] = v[i];
199   }
200   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
201   PetscFunctionReturn(0);
202 }
203 #endif
204 
205 /* Only add/insert a(i,j) with i<=j (blocks).
206    Any a(i,j) with i>j input by user is ingored.
207 */
208 #undef __FUNCT__
209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
211 {
212   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
213   MatScalar      value;
214   PetscTruth     roworiented = baij->roworiented;
215   PetscErrorCode ierr;
216   PetscInt       i,j,row,col;
217   PetscInt       rstart_orig=baij->rstart_bs;
218   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
219   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
220 
221   /* Some Variables required in the macro */
222   Mat            A = baij->A;
223   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
224   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
225   MatScalar      *aa=a->a;
226 
227   Mat            B = baij->B;
228   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
229   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
230   MatScalar     *ba=b->a;
231 
232   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
233   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
234   MatScalar     *ap,*bap;
235 
236   /* for stash */
237   PetscInt      n_loc, *in_loc=0;
238   MatScalar     *v_loc=0;
239 
240   PetscFunctionBegin;
241 
242   if(!baij->donotstash){
243     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
244     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
245   }
246 
247   for (i=0; i<m; i++) {
248     if (im[i] < 0) continue;
249 #if defined(PETSC_USE_DEBUG)
250     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
251 #endif
252     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
253       row = im[i] - rstart_orig;              /* local row index */
254       for (j=0; j<n; j++) {
255         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
256         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
257           col = in[j] - cstart_orig;          /* local col index */
258           brow = row/bs; bcol = col/bs;
259           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
260           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
261           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         } else if (in[j] < 0) continue;
264 #if defined(PETSC_USE_DEBUG)
265         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
266 #endif
267         else {  /* off-diag entry (B) */
268           if (mat->was_assembled) {
269             if (!baij->colmap) {
270               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
271             }
272 #if defined (PETSC_USE_CTABLE)
273             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
274             col  = col - 1;
275 #else
276             col = baij->colmap[in[j]/bs] - 1;
277 #endif
278             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
279               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
280               col =  in[j];
281               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
282               B = baij->B;
283               b = (Mat_SeqBAIJ*)(B)->data;
284               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
285               ba=b->a;
286             } else col += in[j]%bs;
287           } else col = in[j];
288           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
289           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
290           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
291         }
292       }
293     } else {  /* off processor entry */
294       if (!baij->donotstash) {
295         n_loc = 0;
296         for (j=0; j<n; j++){
297           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
298           in_loc[n_loc] = in[j];
299           if (roworiented) {
300             v_loc[n_loc] = v[i*n+j];
301           } else {
302             v_loc[n_loc] = v[j*m+i];
303           }
304           n_loc++;
305         }
306         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
307       }
308     }
309   }
310 
311   if(!baij->donotstash){
312     ierr = PetscFree(in_loc);CHKERRQ(ierr);
313     ierr = PetscFree(v_loc);CHKERRQ(ierr);
314   }
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
321 {
322   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
323   const MatScalar *value;
324   MatScalar       *barray=baij->barray;
325   PetscTruth      roworiented = baij->roworiented;
326   PetscErrorCode  ierr;
327   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
328   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
329   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
330 
331   PetscFunctionBegin;
332   if(!barray) {
333     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
334     baij->barray = barray;
335   }
336 
337   if (roworiented) {
338     stepval = (n-1)*bs;
339   } else {
340     stepval = (m-1)*bs;
341   }
342   for (i=0; i<m; i++) {
343     if (im[i] < 0) continue;
344 #if defined(PETSC_USE_DEBUG)
345     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
346 #endif
347     if (im[i] >= rstart && im[i] < rend) {
348       row = im[i] - rstart;
349       for (j=0; j<n; j++) {
350         /* If NumCol = 1 then a copy is not required */
351         if ((roworiented) && (n == 1)) {
352           barray = (MatScalar*) v + i*bs2;
353         } else if((!roworiented) && (m == 1)) {
354           barray = (MatScalar*) v + j*bs2;
355         } else { /* Here a copy is required */
356           if (roworiented) {
357             value = v + i*(stepval+bs)*bs + j*bs;
358           } else {
359             value = v + j*(stepval+bs)*bs + i*bs;
360           }
361           for (ii=0; ii<bs; ii++,value+=stepval) {
362             for (jj=0; jj<bs; jj++) {
363               *barray++  = *value++;
364             }
365           }
366           barray -=bs2;
367         }
368 
369         if (in[j] >= cstart && in[j] < cend){
370           col  = in[j] - cstart;
371           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
372         }
373         else if (in[j] < 0) continue;
374 #if defined(PETSC_USE_DEBUG)
375         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
376 #endif
377         else {
378           if (mat->was_assembled) {
379             if (!baij->colmap) {
380               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381             }
382 
383 #if defined(PETSC_USE_DEBUG)
384 #if defined (PETSC_USE_CTABLE)
385             { PetscInt data;
386               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
387               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
388             }
389 #else
390             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
391 #endif
392 #endif
393 #if defined (PETSC_USE_CTABLE)
394 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
395             col  = (col - 1)/bs;
396 #else
397             col = (baij->colmap[in[j]] - 1)/bs;
398 #endif
399             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
400               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
401               col =  in[j];
402             }
403           }
404           else col = in[j];
405           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
406         }
407       }
408     } else {
409       if (!baij->donotstash) {
410         if (roworiented) {
411           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
412         } else {
413           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
414         }
415       }
416     }
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatGetValues_MPISBAIJ"
423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
424 {
425   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
426   PetscErrorCode ierr;
427   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
428   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
429 
430   PetscFunctionBegin;
431   for (i=0; i<m; i++) {
432     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
433     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
434     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
435       row = idxm[i] - bsrstart;
436       for (j=0; j<n; j++) {
437         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
438         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
439         if (idxn[j] >= bscstart && idxn[j] < bscend){
440           col = idxn[j] - bscstart;
441           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442         } else {
443           if (!baij->colmap) {
444             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
445           }
446 #if defined (PETSC_USE_CTABLE)
447           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
448           data --;
449 #else
450           data = baij->colmap[idxn[j]/bs]-1;
451 #endif
452           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
453           else {
454             col  = data + idxn[j]%bs;
455             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
456           }
457         }
458       }
459     } else {
460       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
461     }
462   }
463  PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "MatNorm_MPISBAIJ"
468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
469 {
470   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
471   PetscErrorCode ierr;
472   PetscReal      sum[2],*lnorm2;
473 
474   PetscFunctionBegin;
475   if (baij->size == 1) {
476     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
477   } else {
478     if (type == NORM_FROBENIUS) {
479       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
480       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
481       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
482       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
483       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
484       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
485       *norm = sqrt(sum[0] + 2*sum[1]);
486       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
487     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
488       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
489       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
490       PetscReal    *rsum,*rsum2,vabs;
491       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstart,nz;
492       PetscInt     brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs;
493       MatScalar    *v;
494 
495       ierr  = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
496       rsum2 = rsum + mat->N;
497       ierr  = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
498       /* Amat */
499       v = amat->a; jj = amat->j;
500       for (brow=0; brow<mbs; brow++) {
501         grow = bs*(rstart + brow);
502         nz = amat->i[brow+1] - amat->i[brow];
503         for (bcol=0; bcol<nz; bcol++){
504           gcol = bs*(rstart + *jj); jj++;
505           for (col=0; col<bs; col++){
506             for (row=0; row<bs; row++){
507               vabs = PetscAbsScalar(*v); v++;
508               rsum[gcol+col] += vabs;
509               /* non-diagonal block */
510               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
511             }
512           }
513         }
514       }
515       /* Bmat */
516       v = bmat->a; jj = bmat->j;
517       for (brow=0; brow<mbs; brow++) {
518         grow = bs*(rstart + brow);
519         nz = bmat->i[brow+1] - bmat->i[brow];
520         for (bcol=0; bcol<nz; bcol++){
521           gcol = bs*garray[*jj]; jj++;
522           for (col=0; col<bs; col++){
523             for (row=0; row<bs; row++){
524               vabs = PetscAbsScalar(*v); v++;
525               rsum[gcol+col] += vabs;
526               rsum[grow+row] += vabs;
527             }
528           }
529         }
530       }
531       ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
532       *norm = 0.0;
533       for (col=0; col<mat->N; col++) {
534         if (rsum2[col] > *norm) *norm = rsum2[col];
535       }
536       ierr = PetscFree(rsum);CHKERRQ(ierr);
537     } else {
538       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
539     }
540   }
541   PetscFunctionReturn(0);
542 }
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
547 {
548   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
549   PetscErrorCode ierr;
550   PetscInt       nstash,reallocs;
551   InsertMode     addv;
552 
553   PetscFunctionBegin;
554   if (baij->donotstash) {
555     PetscFunctionReturn(0);
556   }
557 
558   /* make sure all processors are either in INSERTMODE or ADDMODE */
559   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
560   if (addv == (ADD_VALUES|INSERT_VALUES)) {
561     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
562   }
563   mat->insertmode = addv; /* in case this processor had no cache */
564 
565   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
566   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
567   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
568   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
569   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
570   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
577 {
578   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
579   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
580   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
581   PetscErrorCode ierr;
582   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
583   PetscInt       *row,*col,other_disassembled;
584   PetscMPIInt    n;
585   PetscTruth     r1,r2,r3;
586   MatScalar      *val;
587   InsertMode     addv = mat->insertmode;
588 
589   PetscFunctionBegin;
590 
591   if (!baij->donotstash) {
592     while (1) {
593       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
594       if (!flg) break;
595 
596       for (i=0; i<n;) {
597         /* Now identify the consecutive vals belonging to the same row */
598         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
599         if (j < n) ncols = j-i;
600         else       ncols = n-i;
601         /* Now assemble all these values with a single function call */
602         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
603         i = j;
604       }
605     }
606     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
607     /* Now process the block-stash. Since the values are stashed column-oriented,
608        set the roworiented flag to column oriented, and after MatSetValues()
609        restore the original flags */
610     r1 = baij->roworiented;
611     r2 = a->roworiented;
612     r3 = b->roworiented;
613     baij->roworiented = PETSC_FALSE;
614     a->roworiented    = PETSC_FALSE;
615     b->roworiented    = PETSC_FALSE;
616     while (1) {
617       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
618       if (!flg) break;
619 
620       for (i=0; i<n;) {
621         /* Now identify the consecutive vals belonging to the same row */
622         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
623         if (j < n) ncols = j-i;
624         else       ncols = n-i;
625         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
626         i = j;
627       }
628     }
629     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
630     baij->roworiented = r1;
631     a->roworiented    = r2;
632     b->roworiented    = r3;
633   }
634 
635   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
636   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
637 
638   /* determine if any processor has disassembled, if so we must
639      also disassemble ourselfs, in order that we may reassemble. */
640   /*
641      if nonzero structure of submatrix B cannot change then we know that
642      no processor disassembled thus we can skip this stuff
643   */
644   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
645     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
646     if (mat->was_assembled && !other_disassembled) {
647       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
648     }
649   }
650 
651   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
652     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
653   }
654   b->compressedrow.use = PETSC_TRUE;
655   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
656   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
657 
658   if (baij->rowvalues) {
659     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
660     baij->rowvalues = 0;
661   }
662 
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
669 {
670   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
671   PetscErrorCode    ierr;
672   PetscInt          bs = mat->bs;
673   PetscMPIInt       size = baij->size,rank = baij->rank;
674   PetscTruth        iascii,isdraw;
675   PetscViewer       sviewer;
676   PetscViewerFormat format;
677 
678   PetscFunctionBegin;
679   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
680   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
681   if (iascii) {
682     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
683     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
684       MatInfo info;
685       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
686       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
687       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
688               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
689               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
690       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
691       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
692       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
693       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
694       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
695       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
696       PetscFunctionReturn(0);
697     } else if (format == PETSC_VIEWER_ASCII_INFO) {
698       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
699       PetscFunctionReturn(0);
700     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
701       PetscFunctionReturn(0);
702     }
703   }
704 
705   if (isdraw) {
706     PetscDraw       draw;
707     PetscTruth isnull;
708     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
709     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
710   }
711 
712   if (size == 1) {
713     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
714     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
715   } else {
716     /* assemble the entire matrix onto first processor. */
717     Mat         A;
718     Mat_SeqSBAIJ *Aloc;
719     Mat_SeqBAIJ *Bloc;
720     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
721     MatScalar   *a;
722 
723     /* Should this be the same type as mat? */
724     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
725     if (!rank) {
726       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
727     } else {
728       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
729     }
730     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
731     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
732     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
733 
734     /* copy over the A part */
735     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
736     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
737     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
738 
739     for (i=0; i<mbs; i++) {
740       rvals[0] = bs*(baij->rstart + i);
741       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
742       for (j=ai[i]; j<ai[i+1]; j++) {
743         col = (baij->cstart+aj[j])*bs;
744         for (k=0; k<bs; k++) {
745           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
746           col++; a += bs;
747         }
748       }
749     }
750     /* copy over the B part */
751     Bloc = (Mat_SeqBAIJ*)baij->B->data;
752     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
753     for (i=0; i<mbs; i++) {
754       rvals[0] = bs*(baij->rstart + i);
755       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
756       for (j=ai[i]; j<ai[i+1]; j++) {
757         col = baij->garray[aj[j]]*bs;
758         for (k=0; k<bs; k++) {
759           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
760           col++; a += bs;
761         }
762       }
763     }
764     ierr = PetscFree(rvals);CHKERRQ(ierr);
765     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
766     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
767     /*
768        Everyone has to call to draw the matrix since the graphics waits are
769        synchronized across all processors that share the PetscDraw object
770     */
771     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
772     if (!rank) {
773       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
774       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
775     }
776     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
777     ierr = MatDestroy(A);CHKERRQ(ierr);
778   }
779   PetscFunctionReturn(0);
780 }
781 
782 #undef __FUNCT__
783 #define __FUNCT__ "MatView_MPISBAIJ"
784 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
785 {
786   PetscErrorCode ierr;
787   PetscTruth     iascii,isdraw,issocket,isbinary;
788 
789   PetscFunctionBegin;
790   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
791   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
792   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
793   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
794   if (iascii || isdraw || issocket || isbinary) {
795     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
796   } else {
797     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
798   }
799   PetscFunctionReturn(0);
800 }
801 
802 #undef __FUNCT__
803 #define __FUNCT__ "MatDestroy_MPISBAIJ"
804 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
805 {
806   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
807   PetscErrorCode ierr;
808 
809   PetscFunctionBegin;
810 #if defined(PETSC_USE_LOG)
811   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
812 #endif
813   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
814   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
815   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
816   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
817   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
818 #if defined (PETSC_USE_CTABLE)
819   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
820 #else
821   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
822 #endif
823   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
824   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
825   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
826   if (baij->slvec0) {
827     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
828     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
829   }
830   if (baij->slvec1) {
831     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
832     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
833     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
834   }
835   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
836   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
837   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
838   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
839 #if defined(PETSC_USE_MAT_SINGLE)
840   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
841 #endif
842   ierr = PetscFree(baij);CHKERRQ(ierr);
843 
844   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
845   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
846   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
847   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatMult_MPISBAIJ"
853 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
854 {
855   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
856   PetscErrorCode ierr;
857   PetscInt       nt,mbs=a->mbs,bs=A->bs;
858   PetscScalar    *x,*from,zero=0.0;
859 
860   PetscFunctionBegin;
861   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
862   if (nt != A->n) {
863     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
864   }
865   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
866   if (nt != A->m) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
868   }
869 
870   /* diagonal part */
871   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
872   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
873 
874   /* subdiagonal part */
875   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
876 
877   /* copy x into the vec slvec0 */
878   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
879   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
880   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
881   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
882 
883   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
884   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
885   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
886 
887   /* supperdiagonal part */
888   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
889 
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
895 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
896 {
897   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
898   PetscErrorCode ierr;
899   PetscInt       nt;
900 
901   PetscFunctionBegin;
902   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
903   if (nt != A->n) {
904     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
905   }
906   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
907   if (nt != A->m) {
908     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
909   }
910 
911   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
912   /* do diagonal part */
913   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
914   /* do supperdiagonal part */
915   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
916   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
917   /* do subdiagonal part */
918   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
919   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
920   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
921 
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
927 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
928 {
929   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
930   PetscErrorCode ierr;
931   PetscInt       mbs=a->mbs,bs=A->bs;
932   PetscScalar    *x,*from,zero=0.0;
933 
934   PetscFunctionBegin;
935   /*
936   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
937   PetscSynchronizedFlush(A->comm);
938   */
939   /* diagonal part */
940   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
941   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
942 
943   /* subdiagonal part */
944   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
945 
946   /* copy x into the vec slvec0 */
947   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
948   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
949   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
950   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
951 
952   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
953   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
954   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
955 
956   /* supperdiagonal part */
957   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
958 
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
964 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
965 {
966   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
967   PetscErrorCode ierr;
968 
969   PetscFunctionBegin;
970   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
971   /* do diagonal part */
972   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
973   /* do supperdiagonal part */
974   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
975   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
976 
977   /* do subdiagonal part */
978   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
979   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
980   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
981 
982   PetscFunctionReturn(0);
983 }
984 
985 /*
986   This only works correctly for square matrices where the subblock A->A is the
987    diagonal block
988 */
989 #undef __FUNCT__
990 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
991 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
992 {
993   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
994   PetscErrorCode ierr;
995 
996   PetscFunctionBegin;
997   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
998   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatScale_MPISBAIJ"
1004 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1005 {
1006   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1011   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1017 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1018 {
1019   PetscFunctionBegin;
1020   if (matin) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format");
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1026 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1027 {
1028   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1029 
1030   PetscFunctionBegin;
1031   if (!baij->getrowactive) {
1032     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1033   }
1034   baij->getrowactive = PETSC_FALSE;
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 #undef __FUNCT__
1039 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1040 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1041 {
1042   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1047   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1053 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1054 {
1055   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1056   PetscErrorCode ierr;
1057 
1058   PetscFunctionBegin;
1059   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1060   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1066 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1067 {
1068   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1069   PetscErrorCode ierr;
1070 
1071   PetscFunctionBegin;
1072   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1073   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1079 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1080 {
1081   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1082   Mat            A = a->A,B = a->B;
1083   PetscErrorCode ierr;
1084   PetscReal      isend[5],irecv[5];
1085 
1086   PetscFunctionBegin;
1087   info->block_size     = (PetscReal)matin->bs;
1088   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1089   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1090   isend[3] = info->memory;  isend[4] = info->mallocs;
1091   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1092   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1093   isend[3] += info->memory;  isend[4] += info->mallocs;
1094   if (flag == MAT_LOCAL) {
1095     info->nz_used      = isend[0];
1096     info->nz_allocated = isend[1];
1097     info->nz_unneeded  = isend[2];
1098     info->memory       = isend[3];
1099     info->mallocs      = isend[4];
1100   } else if (flag == MAT_GLOBAL_MAX) {
1101     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1102     info->nz_used      = irecv[0];
1103     info->nz_allocated = irecv[1];
1104     info->nz_unneeded  = irecv[2];
1105     info->memory       = irecv[3];
1106     info->mallocs      = irecv[4];
1107   } else if (flag == MAT_GLOBAL_SUM) {
1108     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1109     info->nz_used      = irecv[0];
1110     info->nz_allocated = irecv[1];
1111     info->nz_unneeded  = irecv[2];
1112     info->memory       = irecv[3];
1113     info->mallocs      = irecv[4];
1114   } else {
1115     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1116   }
1117   info->rows_global       = (PetscReal)A->M;
1118   info->columns_global    = (PetscReal)A->N;
1119   info->rows_local        = (PetscReal)A->m;
1120   info->columns_local     = (PetscReal)A->N;
1121   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1122   info->fill_ratio_needed = 0;
1123   info->factor_mallocs    = 0;
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1129 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1130 {
1131   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   switch (op) {
1136   case MAT_NO_NEW_NONZERO_LOCATIONS:
1137   case MAT_YES_NEW_NONZERO_LOCATIONS:
1138   case MAT_COLUMNS_UNSORTED:
1139   case MAT_COLUMNS_SORTED:
1140   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1141   case MAT_KEEP_ZEROED_ROWS:
1142   case MAT_NEW_NONZERO_LOCATION_ERR:
1143     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1144     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1145     break;
1146   case MAT_ROW_ORIENTED:
1147     a->roworiented = PETSC_TRUE;
1148     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1149     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1150     break;
1151   case MAT_ROWS_SORTED:
1152   case MAT_ROWS_UNSORTED:
1153   case MAT_YES_NEW_DIAGONALS:
1154     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1155     break;
1156   case MAT_COLUMN_ORIENTED:
1157     a->roworiented = PETSC_FALSE;
1158     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1159     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1160     break;
1161   case MAT_IGNORE_OFF_PROC_ENTRIES:
1162     a->donotstash = PETSC_TRUE;
1163     break;
1164   case MAT_NO_NEW_DIAGONALS:
1165     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1166   case MAT_USE_HASH_TABLE:
1167     a->ht_flag = PETSC_TRUE;
1168     break;
1169   case MAT_NOT_SYMMETRIC:
1170   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1171   case MAT_HERMITIAN:
1172     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1173   case MAT_SYMMETRIC:
1174   case MAT_STRUCTURALLY_SYMMETRIC:
1175   case MAT_NOT_HERMITIAN:
1176   case MAT_SYMMETRY_ETERNAL:
1177   case MAT_NOT_SYMMETRY_ETERNAL:
1178     break;
1179   default:
1180     SETERRQ(PETSC_ERR_SUP,"unknown option");
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1187 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1188 {
1189   PetscErrorCode ierr;
1190   PetscFunctionBegin;
1191   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1197 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1198 {
1199   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1200   Mat            a=baij->A, b=baij->B;
1201   PetscErrorCode ierr;
1202   PetscInt       nv,m,n;
1203   PetscTruth     flg;
1204 
1205   PetscFunctionBegin;
1206   if (ll != rr){
1207     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1208     if (!flg)
1209       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1210   }
1211   if (!ll) PetscFunctionReturn(0);
1212 
1213   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1214   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1215 
1216   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1217   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1218 
1219   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1220 
1221   /* left diagonalscale the off-diagonal part */
1222   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1223 
1224   /* scale the diagonal part */
1225   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1226 
1227   /* right diagonalscale the off-diagonal part */
1228   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1229   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1235 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1236 {
1237   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1238   MPI_Comm          comm = A->comm;
1239   static PetscTruth called = PETSC_FALSE;
1240   PetscErrorCode    ierr;
1241 
1242   PetscFunctionBegin;
1243   if (!a->rank) {
1244     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1245   }
1246   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1247   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1248   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1254 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1255 {
1256   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1257   PetscErrorCode ierr;
1258 
1259   PetscFunctionBegin;
1260   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatEqual_MPISBAIJ"
1268 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1269 {
1270   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1271   Mat            a,b,c,d;
1272   PetscTruth     flg;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276   a = matA->A; b = matA->B;
1277   c = matB->A; d = matB->B;
1278 
1279   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1280   if (flg) {
1281     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1282   }
1283   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 #undef __FUNCT__
1288 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1289 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1290 {
1291   PetscErrorCode ierr;
1292 
1293   PetscFunctionBegin;
1294   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 #include "petscblaslapack.h"
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1301 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1302 {
1303   PetscErrorCode ierr;
1304   PetscInt       i;
1305   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1306   PetscBLASInt   bnz,one=1;
1307   Mat_SeqSBAIJ   *xa,*ya;
1308   Mat_SeqBAIJ    *xb,*yb;
1309 
1310   PetscFunctionBegin;
1311   if (str == SAME_NONZERO_PATTERN) {
1312     PetscScalar alpha = a;
1313     xa = (Mat_SeqSBAIJ *)xx->A->data;
1314     ya = (Mat_SeqSBAIJ *)yy->A->data;
1315     bnz = (PetscBLASInt)xa->nz;
1316     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1317     xb = (Mat_SeqBAIJ *)xx->B->data;
1318     yb = (Mat_SeqBAIJ *)yy->B->data;
1319     bnz = (PetscBLASInt)xb->nz;
1320     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1321   } else {
1322     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1323   }
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1329 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1330 {
1331   PetscErrorCode ierr;
1332   PetscInt       i;
1333   PetscTruth     flg;
1334 
1335   PetscFunctionBegin;
1336   for (i=0; i<n; i++) {
1337     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1338     if (!flg) {
1339       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1340     }
1341   }
1342   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 
1347 /* -------------------------------------------------------------------*/
1348 static struct _MatOps MatOps_Values = {
1349        MatSetValues_MPISBAIJ,
1350        MatGetRow_MPISBAIJ,
1351        MatRestoreRow_MPISBAIJ,
1352        MatMult_MPISBAIJ,
1353 /* 4*/ MatMultAdd_MPISBAIJ,
1354        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1355        MatMultAdd_MPISBAIJ,
1356        0,
1357        0,
1358        0,
1359 /*10*/ 0,
1360        0,
1361        0,
1362        MatRelax_MPISBAIJ,
1363        MatTranspose_MPISBAIJ,
1364 /*15*/ MatGetInfo_MPISBAIJ,
1365        MatEqual_MPISBAIJ,
1366        MatGetDiagonal_MPISBAIJ,
1367        MatDiagonalScale_MPISBAIJ,
1368        MatNorm_MPISBAIJ,
1369 /*20*/ MatAssemblyBegin_MPISBAIJ,
1370        MatAssemblyEnd_MPISBAIJ,
1371        0,
1372        MatSetOption_MPISBAIJ,
1373        MatZeroEntries_MPISBAIJ,
1374 /*25*/ 0,
1375        0,
1376        0,
1377        0,
1378        0,
1379 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1380        0,
1381        0,
1382        0,
1383        0,
1384 /*35*/ MatDuplicate_MPISBAIJ,
1385        0,
1386        0,
1387        0,
1388        0,
1389 /*40*/ MatAXPY_MPISBAIJ,
1390        MatGetSubMatrices_MPISBAIJ,
1391        MatIncreaseOverlap_MPISBAIJ,
1392        MatGetValues_MPISBAIJ,
1393        0,
1394 /*45*/ MatPrintHelp_MPISBAIJ,
1395        MatScale_MPISBAIJ,
1396        0,
1397        0,
1398        0,
1399 /*50*/ 0,
1400        0,
1401        0,
1402        0,
1403        0,
1404 /*55*/ 0,
1405        0,
1406        MatSetUnfactored_MPISBAIJ,
1407        0,
1408        MatSetValuesBlocked_MPISBAIJ,
1409 /*60*/ 0,
1410        0,
1411        0,
1412        MatGetPetscMaps_Petsc,
1413        0,
1414 /*65*/ 0,
1415        0,
1416        0,
1417        0,
1418        0,
1419 /*70*/ MatGetRowMax_MPISBAIJ,
1420        0,
1421        0,
1422        0,
1423        0,
1424 /*75*/ 0,
1425        0,
1426        0,
1427        0,
1428        0,
1429 /*80*/ 0,
1430        0,
1431        0,
1432        0,
1433        MatLoad_MPISBAIJ,
1434 /*85*/ 0,
1435        0,
1436        0,
1437        0,
1438        0,
1439 /*90*/ 0,
1440        0,
1441        0,
1442        0,
1443        0,
1444 /*95*/ 0,
1445        0,
1446        0,
1447        0,
1448        0,
1449 /*100*/0,
1450        0,
1451        0,
1452        0,
1453        0,
1454 /*105*/0,
1455        MatRealPart_MPISBAIJ,
1456        MatImaginaryPart_MPISBAIJ
1457 };
1458 
1459 
1460 EXTERN_C_BEGIN
1461 #undef __FUNCT__
1462 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1463 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1464 {
1465   PetscFunctionBegin;
1466   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1467   *iscopy = PETSC_FALSE;
1468   PetscFunctionReturn(0);
1469 }
1470 EXTERN_C_END
1471 
1472 EXTERN_C_BEGIN
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1475 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1476 {
1477   Mat_MPISBAIJ   *b;
1478   PetscErrorCode ierr;
1479   PetscInt       i,mbs,Mbs;
1480 
1481   PetscFunctionBegin;
1482   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1483 
1484   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1485   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1486   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1487   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1488   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1489   if (d_nnz) {
1490     for (i=0; i<B->m/bs; i++) {
1491       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1492     }
1493   }
1494   if (o_nnz) {
1495     for (i=0; i<B->m/bs; i++) {
1496       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1497     }
1498   }
1499   B->preallocated = PETSC_TRUE;
1500   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1501   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1502   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1503   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1504 
1505   b   = (Mat_MPISBAIJ*)B->data;
1506   mbs = B->m/bs;
1507   Mbs = B->M/bs;
1508   if (mbs*bs != B->m) {
1509     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1510   }
1511 
1512   B->bs  = bs;
1513   b->bs2 = bs*bs;
1514   b->mbs = mbs;
1515   b->nbs = mbs;
1516   b->Mbs = Mbs;
1517   b->Nbs = Mbs;
1518 
1519   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1520   b->rowners[0]    = 0;
1521   for (i=2; i<=b->size; i++) {
1522     b->rowners[i] += b->rowners[i-1];
1523   }
1524   b->rstart    = b->rowners[b->rank];
1525   b->rend      = b->rowners[b->rank+1];
1526   b->cstart    = b->rstart;
1527   b->cend      = b->rend;
1528   for (i=0; i<=b->size; i++) {
1529     b->rowners_bs[i] = b->rowners[i]*bs;
1530   }
1531   b->rstart_bs = b-> rstart*bs;
1532   b->rend_bs   = b->rend*bs;
1533 
1534   b->cstart_bs = b->cstart*bs;
1535   b->cend_bs   = b->cend*bs;
1536 
1537   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1538   ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr);
1539   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1540   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1541   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1542 
1543   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1544   ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr);
1545   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1546   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1547   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1548 
1549   /* build cache for off array entries formed */
1550   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1551 
1552   PetscFunctionReturn(0);
1553 }
1554 EXTERN_C_END
1555 
1556 /*MC
1557    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1558    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1559 
1560    Options Database Keys:
1561 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1562 
1563   Level: beginner
1564 
1565 .seealso: MatCreateMPISBAIJ
1566 M*/
1567 
1568 EXTERN_C_BEGIN
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatCreate_MPISBAIJ"
1571 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1572 {
1573   Mat_MPISBAIJ   *b;
1574   PetscErrorCode ierr;
1575   PetscTruth     flg;
1576 
1577   PetscFunctionBegin;
1578 
1579   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1580   B->data = (void*)b;
1581   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1582 
1583   B->ops->destroy    = MatDestroy_MPISBAIJ;
1584   B->ops->view       = MatView_MPISBAIJ;
1585   B->mapping    = 0;
1586   B->factor     = 0;
1587   B->assembled  = PETSC_FALSE;
1588 
1589   B->insertmode = NOT_SET_VALUES;
1590   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1591   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1592 
1593   /* build local table of row and column ownerships */
1594   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1595   b->cowners    = b->rowners + b->size + 2;
1596   b->rowners_bs = b->cowners + b->size + 2;
1597   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1598 
1599   /* build cache for off array entries formed */
1600   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1601   b->donotstash  = PETSC_FALSE;
1602   b->colmap      = PETSC_NULL;
1603   b->garray      = PETSC_NULL;
1604   b->roworiented = PETSC_TRUE;
1605 
1606 #if defined(PETSC_USE_MAT_SINGLE)
1607   /* stuff for MatSetValues_XXX in single precision */
1608   b->setvalueslen     = 0;
1609   b->setvaluescopy    = PETSC_NULL;
1610 #endif
1611 
1612   /* stuff used in block assembly */
1613   b->barray       = 0;
1614 
1615   /* stuff used for matrix vector multiply */
1616   b->lvec         = 0;
1617   b->Mvctx        = 0;
1618   b->slvec0       = 0;
1619   b->slvec0b      = 0;
1620   b->slvec1       = 0;
1621   b->slvec1a      = 0;
1622   b->slvec1b      = 0;
1623   b->sMvctx       = 0;
1624 
1625   /* stuff for MatGetRow() */
1626   b->rowindices   = 0;
1627   b->rowvalues    = 0;
1628   b->getrowactive = PETSC_FALSE;
1629 
1630   /* hash table stuff */
1631   b->ht           = 0;
1632   b->hd           = 0;
1633   b->ht_size      = 0;
1634   b->ht_flag      = PETSC_FALSE;
1635   b->ht_fact      = 0;
1636   b->ht_total_ct  = 0;
1637   b->ht_insert_ct = 0;
1638 
1639   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1640   if (flg) {
1641     PetscReal fact = 1.39;
1642     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1643     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1644     if (fact <= 1.0) fact = 1.39;
1645     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1646     ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
1647   }
1648   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1649                                      "MatStoreValues_MPISBAIJ",
1650                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1651   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1652                                      "MatRetrieveValues_MPISBAIJ",
1653                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1654   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1655                                      "MatGetDiagonalBlock_MPISBAIJ",
1656                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1657   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1658                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1659                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1660   B->symmetric                  = PETSC_TRUE;
1661   B->structurally_symmetric     = PETSC_TRUE;
1662   B->symmetric_set              = PETSC_TRUE;
1663   B->structurally_symmetric_set = PETSC_TRUE;
1664   PetscFunctionReturn(0);
1665 }
1666 EXTERN_C_END
1667 
1668 /*MC
1669    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1670 
1671    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1672    and MATMPISBAIJ otherwise.
1673 
1674    Options Database Keys:
1675 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1676 
1677   Level: beginner
1678 
1679 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1680 M*/
1681 
1682 EXTERN_C_BEGIN
1683 #undef __FUNCT__
1684 #define __FUNCT__ "MatCreate_SBAIJ"
1685 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1686 {
1687   PetscErrorCode ierr;
1688   PetscMPIInt    size;
1689 
1690   PetscFunctionBegin;
1691   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1692   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1693   if (size == 1) {
1694     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1695   } else {
1696     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1697   }
1698   PetscFunctionReturn(0);
1699 }
1700 EXTERN_C_END
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1704 /*@C
1705    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1706    the user should preallocate the matrix storage by setting the parameters
1707    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1708    performance can be increased by more than a factor of 50.
1709 
1710    Collective on Mat
1711 
1712    Input Parameters:
1713 +  A - the matrix
1714 .  bs   - size of blockk
1715 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1716            submatrix  (same for all local rows)
1717 .  d_nnz - array containing the number of block nonzeros in the various block rows
1718            in the upper triangular and diagonal part of the in diagonal portion of the local
1719            (possibly different for each block row) or PETSC_NULL.  You must leave room
1720            for the diagonal entry even if it is zero.
1721 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1722            submatrix (same for all local rows).
1723 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1724            off-diagonal portion of the local submatrix (possibly different for
1725            each block row) or PETSC_NULL.
1726 
1727 
1728    Options Database Keys:
1729 .   -mat_no_unroll - uses code that does not unroll the loops in the
1730                      block calculations (much slower)
1731 .   -mat_block_size - size of the blocks to use
1732 
1733    Notes:
1734 
1735    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1736    than it must be used on all processors that share the object for that argument.
1737 
1738    If the *_nnz parameter is given then the *_nz parameter is ignored
1739 
1740    Storage Information:
1741    For a square global matrix we define each processor's diagonal portion
1742    to be its local rows and the corresponding columns (a square submatrix);
1743    each processor's off-diagonal portion encompasses the remainder of the
1744    local matrix (a rectangular submatrix).
1745 
1746    The user can specify preallocated storage for the diagonal part of
1747    the local submatrix with either d_nz or d_nnz (not both).  Set
1748    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1749    memory allocation.  Likewise, specify preallocated storage for the
1750    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1751 
1752    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1753    the figure below we depict these three local rows and all columns (0-11).
1754 
1755 .vb
1756            0 1 2 3 4 5 6 7 8 9 10 11
1757           -------------------
1758    row 3  |  o o o d d d o o o o o o
1759    row 4  |  o o o d d d o o o o o o
1760    row 5  |  o o o d d d o o o o o o
1761           -------------------
1762 .ve
1763 
1764    Thus, any entries in the d locations are stored in the d (diagonal)
1765    submatrix, and any entries in the o locations are stored in the
1766    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1767    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1768 
1769    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1770    plus the diagonal part of the d matrix,
1771    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1772    In general, for PDE problems in which most nonzeros are near the diagonal,
1773    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1774    or you will get TERRIBLE performance; see the users' manual chapter on
1775    matrices.
1776 
1777    Level: intermediate
1778 
1779 .keywords: matrix, block, aij, compressed row, sparse, parallel
1780 
1781 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1782 @*/
1783 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1784 {
1785   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1786 
1787   PetscFunctionBegin;
1788   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1789   if (f) {
1790     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1791   }
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 #undef __FUNCT__
1796 #define __FUNCT__ "MatCreateMPISBAIJ"
1797 /*@C
1798    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1799    (block compressed row).  For good matrix assembly performance
1800    the user should preallocate the matrix storage by setting the parameters
1801    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1802    performance can be increased by more than a factor of 50.
1803 
1804    Collective on MPI_Comm
1805 
1806    Input Parameters:
1807 +  comm - MPI communicator
1808 .  bs   - size of blockk
1809 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1810            This value should be the same as the local size used in creating the
1811            y vector for the matrix-vector product y = Ax.
1812 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1813            This value should be the same as the local size used in creating the
1814            x vector for the matrix-vector product y = Ax.
1815 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1816 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1817 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1818            submatrix  (same for all local rows)
1819 .  d_nnz - array containing the number of block nonzeros in the various block rows
1820            in the upper triangular portion of the in diagonal portion of the local
1821            (possibly different for each block block row) or PETSC_NULL.
1822            You must leave room for the diagonal entry even if it is zero.
1823 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1824            submatrix (same for all local rows).
1825 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1826            off-diagonal portion of the local submatrix (possibly different for
1827            each block row) or PETSC_NULL.
1828 
1829    Output Parameter:
1830 .  A - the matrix
1831 
1832    Options Database Keys:
1833 .   -mat_no_unroll - uses code that does not unroll the loops in the
1834                      block calculations (much slower)
1835 .   -mat_block_size - size of the blocks to use
1836 .   -mat_mpi - use the parallel matrix data structures even on one processor
1837                (defaults to using SeqBAIJ format on one processor)
1838 
1839    Notes:
1840    The number of rows and columns must be divisible by blocksize.
1841 
1842    The user MUST specify either the local or global matrix dimensions
1843    (possibly both).
1844 
1845    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1846    than it must be used on all processors that share the object for that argument.
1847 
1848    If the *_nnz parameter is given then the *_nz parameter is ignored
1849 
1850    Storage Information:
1851    For a square global matrix we define each processor's diagonal portion
1852    to be its local rows and the corresponding columns (a square submatrix);
1853    each processor's off-diagonal portion encompasses the remainder of the
1854    local matrix (a rectangular submatrix).
1855 
1856    The user can specify preallocated storage for the diagonal part of
1857    the local submatrix with either d_nz or d_nnz (not both).  Set
1858    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1859    memory allocation.  Likewise, specify preallocated storage for the
1860    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1861 
1862    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1863    the figure below we depict these three local rows and all columns (0-11).
1864 
1865 .vb
1866            0 1 2 3 4 5 6 7 8 9 10 11
1867           -------------------
1868    row 3  |  o o o d d d o o o o o o
1869    row 4  |  o o o d d d o o o o o o
1870    row 5  |  o o o d d d o o o o o o
1871           -------------------
1872 .ve
1873 
1874    Thus, any entries in the d locations are stored in the d (diagonal)
1875    submatrix, and any entries in the o locations are stored in the
1876    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1877    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1878 
1879    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1880    plus the diagonal part of the d matrix,
1881    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1882    In general, for PDE problems in which most nonzeros are near the diagonal,
1883    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1884    or you will get TERRIBLE performance; see the users' manual chapter on
1885    matrices.
1886 
1887    Level: intermediate
1888 
1889 .keywords: matrix, block, aij, compressed row, sparse, parallel
1890 
1891 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1892 @*/
1893 
1894 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1895 {
1896   PetscErrorCode ierr;
1897   PetscMPIInt    size;
1898 
1899   PetscFunctionBegin;
1900   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1901   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1902   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1903   if (size > 1) {
1904     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1905     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1906   } else {
1907     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1908     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1909   }
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1916 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1917 {
1918   Mat            mat;
1919   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1920   PetscErrorCode ierr;
1921   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1922   PetscScalar    *array;
1923 
1924   PetscFunctionBegin;
1925   *newmat       = 0;
1926   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1927   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
1928   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1929   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1930 
1931   mat->factor       = matin->factor;
1932   mat->preallocated = PETSC_TRUE;
1933   mat->assembled    = PETSC_TRUE;
1934   mat->insertmode   = NOT_SET_VALUES;
1935 
1936   a = (Mat_MPISBAIJ*)mat->data;
1937   mat->bs  = matin->bs;
1938   a->bs2   = oldmat->bs2;
1939   a->mbs   = oldmat->mbs;
1940   a->nbs   = oldmat->nbs;
1941   a->Mbs   = oldmat->Mbs;
1942   a->Nbs   = oldmat->Nbs;
1943 
1944   a->rstart       = oldmat->rstart;
1945   a->rend         = oldmat->rend;
1946   a->cstart       = oldmat->cstart;
1947   a->cend         = oldmat->cend;
1948   a->size         = oldmat->size;
1949   a->rank         = oldmat->rank;
1950   a->donotstash   = oldmat->donotstash;
1951   a->roworiented  = oldmat->roworiented;
1952   a->rowindices   = 0;
1953   a->rowvalues    = 0;
1954   a->getrowactive = PETSC_FALSE;
1955   a->barray       = 0;
1956   a->rstart_bs    = oldmat->rstart_bs;
1957   a->rend_bs      = oldmat->rend_bs;
1958   a->cstart_bs    = oldmat->cstart_bs;
1959   a->cend_bs      = oldmat->cend_bs;
1960 
1961   /* hash table stuff */
1962   a->ht           = 0;
1963   a->hd           = 0;
1964   a->ht_size      = 0;
1965   a->ht_flag      = oldmat->ht_flag;
1966   a->ht_fact      = oldmat->ht_fact;
1967   a->ht_total_ct  = 0;
1968   a->ht_insert_ct = 0;
1969 
1970   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1971   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1972   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
1973   if (oldmat->colmap) {
1974 #if defined (PETSC_USE_CTABLE)
1975     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1976 #else
1977     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1978     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1979     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1980 #endif
1981   } else a->colmap = 0;
1982 
1983   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1984     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1985     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1986     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
1987   } else a->garray = 0;
1988 
1989   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1990   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1991   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1992   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1993 
1994   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
1995   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
1996   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
1997   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
1998 
1999   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2000   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2001   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2002   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2003   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2004   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2005   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2006   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2007   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2008   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2009   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2010   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2011   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2012 
2013   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2014   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2015   a->sMvctx = oldmat->sMvctx;
2016   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2017 
2018   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2019   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2020   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2021   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2022   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2023   *newmat = mat;
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 #include "petscsys.h"
2028 
2029 #undef __FUNCT__
2030 #define __FUNCT__ "MatLoad_MPISBAIJ"
2031 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2032 {
2033   Mat            A;
2034   PetscErrorCode ierr;
2035   PetscInt       i,nz,j,rstart,rend;
2036   PetscScalar    *vals,*buf;
2037   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2038   MPI_Status     status;
2039   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens;
2040   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2041   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2042   PetscInt       bs=1,Mbs,mbs,extra_rows;
2043   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2044   PetscInt       dcount,kmax,k,nzcount,tmp;
2045   int            fd;
2046 
2047   PetscFunctionBegin;
2048   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2049 
2050   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2051   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2052   if (!rank) {
2053     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2054     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2055     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2056     if (header[3] < 0) {
2057       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2058     }
2059   }
2060 
2061   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2062   M = header[1]; N = header[2];
2063 
2064   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2065 
2066   /*
2067      This code adds extra rows to make sure the number of rows is
2068      divisible by the blocksize
2069   */
2070   Mbs        = M/bs;
2071   extra_rows = bs - M + bs*(Mbs);
2072   if (extra_rows == bs) extra_rows = 0;
2073   else                  Mbs++;
2074   if (extra_rows &&!rank) {
2075     ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2076   }
2077 
2078   /* determine ownership of all rows */
2079   mbs        = Mbs/size + ((Mbs % size) > rank);
2080   m          = mbs*bs;
2081   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2082   browners   = rowners + size + 1;
2083   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2084   rowners[0] = 0;
2085   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2086   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2087   rstart = rowners[rank];
2088   rend   = rowners[rank+1];
2089 
2090   /* distribute row lengths to all processors */
2091   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2092   if (!rank) {
2093     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2094     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2095     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2096     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2097     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2098     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2099     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2100   } else {
2101     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2102   }
2103 
2104   if (!rank) {   /* procs[0] */
2105     /* calculate the number of nonzeros on each processor */
2106     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2107     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2108     for (i=0; i<size; i++) {
2109       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2110         procsnz[i] += rowlengths[j];
2111       }
2112     }
2113     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2114 
2115     /* determine max buffer needed and allocate it */
2116     maxnz = 0;
2117     for (i=0; i<size; i++) {
2118       maxnz = PetscMax(maxnz,procsnz[i]);
2119     }
2120     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2121 
2122     /* read in my part of the matrix column indices  */
2123     nz     = procsnz[0];
2124     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2125     mycols = ibuf;
2126     if (size == 1)  nz -= extra_rows;
2127     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2128     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2129 
2130     /* read in every ones (except the last) and ship off */
2131     for (i=1; i<size-1; i++) {
2132       nz   = procsnz[i];
2133       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2134       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2135     }
2136     /* read in the stuff for the last proc */
2137     if (size != 1) {
2138       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2139       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2140       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2141       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2142     }
2143     ierr = PetscFree(cols);CHKERRQ(ierr);
2144   } else {  /* procs[i], i>0 */
2145     /* determine buffer space needed for message */
2146     nz = 0;
2147     for (i=0; i<m; i++) {
2148       nz += locrowlens[i];
2149     }
2150     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2151     mycols = ibuf;
2152     /* receive message of column indices*/
2153     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2154     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2155     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2156   }
2157 
2158   /* loop over local rows, determining number of off diagonal entries */
2159   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2160   odlens   = dlens + (rend-rstart);
2161   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2162   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2163   masked1  = mask    + Mbs;
2164   masked2  = masked1 + Mbs;
2165   rowcount = 0; nzcount = 0;
2166   for (i=0; i<mbs; i++) {
2167     dcount  = 0;
2168     odcount = 0;
2169     for (j=0; j<bs; j++) {
2170       kmax = locrowlens[rowcount];
2171       for (k=0; k<kmax; k++) {
2172         tmp = mycols[nzcount++]/bs; /* block col. index */
2173         if (!mask[tmp]) {
2174           mask[tmp] = 1;
2175           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2176           else masked1[dcount++] = tmp; /* entry in diag portion */
2177         }
2178       }
2179       rowcount++;
2180     }
2181 
2182     dlens[i]  = dcount;  /* d_nzz[i] */
2183     odlens[i] = odcount; /* o_nzz[i] */
2184 
2185     /* zero out the mask elements we set */
2186     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2187     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2188   }
2189 
2190   /* create our matrix */
2191   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2192   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2193   ierr = MatSetType(A,type);CHKERRQ(ierr);
2194   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2195   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2196 
2197   if (!rank) {
2198     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2199     /* read in my part of the matrix numerical values  */
2200     nz = procsnz[0];
2201     vals = buf;
2202     mycols = ibuf;
2203     if (size == 1)  nz -= extra_rows;
2204     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2205     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2206 
2207     /* insert into matrix */
2208     jj      = rstart*bs;
2209     for (i=0; i<m; i++) {
2210       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2211       mycols += locrowlens[i];
2212       vals   += locrowlens[i];
2213       jj++;
2214     }
2215 
2216     /* read in other processors (except the last one) and ship out */
2217     for (i=1; i<size-1; i++) {
2218       nz   = procsnz[i];
2219       vals = buf;
2220       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2221       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2222     }
2223     /* the last proc */
2224     if (size != 1){
2225       nz   = procsnz[i] - extra_rows;
2226       vals = buf;
2227       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2228       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2229       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2230     }
2231     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2232 
2233   } else {
2234     /* receive numeric values */
2235     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2236 
2237     /* receive message of values*/
2238     vals   = buf;
2239     mycols = ibuf;
2240     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2241     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2242     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2243 
2244     /* insert into matrix */
2245     jj      = rstart*bs;
2246     for (i=0; i<m; i++) {
2247       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2248       mycols += locrowlens[i];
2249       vals   += locrowlens[i];
2250       jj++;
2251     }
2252   }
2253 
2254   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2255   ierr = PetscFree(buf);CHKERRQ(ierr);
2256   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2257   ierr = PetscFree(rowners);CHKERRQ(ierr);
2258   ierr = PetscFree(dlens);CHKERRQ(ierr);
2259   ierr = PetscFree(mask);CHKERRQ(ierr);
2260   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2261   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2262   *newmat = A;
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 #undef __FUNCT__
2267 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2268 /*XXXXX@
2269    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2270 
2271    Input Parameters:
2272 .  mat  - the matrix
2273 .  fact - factor
2274 
2275    Collective on Mat
2276 
2277    Level: advanced
2278 
2279   Notes:
2280    This can also be set by the command line option: -mat_use_hash_table fact
2281 
2282 .keywords: matrix, hashtable, factor, HT
2283 
2284 .seealso: MatSetOption()
2285 @XXXXX*/
2286 
2287 
2288 #undef __FUNCT__
2289 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2290 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2291 {
2292   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2293   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2294   PetscReal      atmp;
2295   PetscReal      *work,*svalues,*rvalues;
2296   PetscErrorCode ierr;
2297   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2298   PetscMPIInt    rank,size;
2299   PetscInt       *rowners_bs,dest,count,source;
2300   PetscScalar    *va;
2301   MatScalar      *ba;
2302   MPI_Status     stat;
2303 
2304   PetscFunctionBegin;
2305   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2306   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2307 
2308   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2309   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2310 
2311   bs   = A->bs;
2312   mbs  = a->mbs;
2313   Mbs  = a->Mbs;
2314   ba   = b->a;
2315   bi   = b->i;
2316   bj   = b->j;
2317 
2318   /* find ownerships */
2319   rowners_bs = a->rowners_bs;
2320 
2321   /* each proc creates an array to be distributed */
2322   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2323   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2324 
2325   /* row_max for B */
2326   if (rank != size-1){
2327     for (i=0; i<mbs; i++) {
2328       ncols = bi[1] - bi[0]; bi++;
2329       brow  = bs*i;
2330       for (j=0; j<ncols; j++){
2331         bcol = bs*(*bj);
2332         for (kcol=0; kcol<bs; kcol++){
2333           col = bcol + kcol;                 /* local col index */
2334           col += rowners_bs[rank+1];      /* global col index */
2335           for (krow=0; krow<bs; krow++){
2336             atmp = PetscAbsScalar(*ba); ba++;
2337             row = brow + krow;    /* local row index */
2338             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2339             if (work[col] < atmp) work[col] = atmp;
2340           }
2341         }
2342         bj++;
2343       }
2344     }
2345 
2346     /* send values to its owners */
2347     for (dest=rank+1; dest<size; dest++){
2348       svalues = work + rowners_bs[dest];
2349       count   = rowners_bs[dest+1]-rowners_bs[dest];
2350       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2351     }
2352   }
2353 
2354   /* receive values */
2355   if (rank){
2356     rvalues = work;
2357     count   = rowners_bs[rank+1]-rowners_bs[rank];
2358     for (source=0; source<rank; source++){
2359       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2360       /* process values */
2361       for (i=0; i<count; i++){
2362         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2363       }
2364     }
2365   }
2366 
2367   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2368   ierr = PetscFree(work);CHKERRQ(ierr);
2369   PetscFunctionReturn(0);
2370 }
2371 
2372 #undef __FUNCT__
2373 #define __FUNCT__ "MatRelax_MPISBAIJ"
2374 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2375 {
2376   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2377   PetscErrorCode ierr;
2378   PetscInt       mbs=mat->mbs,bs=matin->bs;
2379   PetscScalar    *x,*b,*ptr,zero=0.0;
2380   Vec            bb1;
2381 
2382   PetscFunctionBegin;
2383   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2384   if (bs > 1)
2385     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2386 
2387   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2388     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2389       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2390       its--;
2391     }
2392 
2393     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2394     while (its--){
2395 
2396       /* lower triangular part: slvec0b = - B^T*xx */
2397       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2398 
2399       /* copy xx into slvec0a */
2400       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2401       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2402       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2403       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2404 
2405       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2406 
2407       /* copy bb into slvec1a */
2408       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2409       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2410       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2411       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2412 
2413       /* set slvec1b = 0 */
2414       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2415 
2416       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2417       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2418       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2419       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2420 
2421       /* upper triangular part: bb1 = bb1 - B*x */
2422       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2423 
2424       /* local diagonal sweep */
2425       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2426     }
2427     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2428   } else {
2429     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2430   }
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 #undef __FUNCT__
2435 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2436 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2437 {
2438   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2439   PetscErrorCode ierr;
2440   Vec            lvec1,bb1;
2441 
2442   PetscFunctionBegin;
2443   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2444   if (matin->bs > 1)
2445     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2446 
2447   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2448     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2449       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2450       its--;
2451     }
2452 
2453     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2454     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2455     while (its--){
2456       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2457 
2458       /* lower diagonal part: bb1 = bb - B^T*xx */
2459       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2460       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2461 
2462       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2463       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2464       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2465 
2466       /* upper diagonal part: bb1 = bb1 - B*x */
2467       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2468       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2469 
2470       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2471 
2472       /* diagonal sweep */
2473       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2474     }
2475     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2476     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2477   } else {
2478     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2479   }
2480   PetscFunctionReturn(0);
2481 }
2482 
2483