xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision ed1caa07d30b4bf52fa72dab33adf574bdef2af0)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
103       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
104       N = nrow++ - 1;  \
105       /* shift up all the later entries in this row */ \
106       for (ii=N; ii>=_i; ii--) { \
107         rp[ii+1] = rp[ii]; \
108         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
109       } \
110       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
111       rp[_i]                      = bcol;  \
112       ap[bs2*_i + bs*cidx + ridx] = value;  \
113       a_noinsert:; \
114     ailen[brow] = nrow; \
115 }
116 #ifndef MatSetValues_SeqBAIJ_B_Private
117 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
118 { \
119     brow = row/bs;  \
120     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
121     rmax = bimax[brow]; nrow = bilen[brow]; \
122       bcol = col/bs; \
123       ridx = row % bs; cidx = col % bs; \
124       low = 0; high = nrow; \
125       while (high-low > 3) { \
126         t = (low+high)/2; \
127         if (rp[t] > bcol) high = t; \
128         else              low  = t; \
129       } \
130       for (_i=low; _i<high; _i++) { \
131         if (rp[_i] > bcol) break; \
132         if (rp[_i] == bcol) { \
133           bap  = ap +  bs2*_i + bs*cidx + ridx; \
134           if (addv == ADD_VALUES) *bap += value;  \
135           else                    *bap  = value;  \
136           goto b_noinsert; \
137         } \
138       } \
139       if (b->nonew == 1) goto b_noinsert; \
140       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
141       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       b_noinsert:; \
152     bilen[brow] = nrow; \
153 }
154 #endif
155 
156 #if defined(PETSC_USE_MAT_SINGLE)
157 #undef __FUNCT__
158 #define __FUNCT__ "MatSetValues_MPISBAIJ"
159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
160 {
161   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,N = m*n;
164   MatScalar      *vsingle;
165 
166   PetscFunctionBegin;
167   if (N > b->setvalueslen) {
168     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
169     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
170     b->setvalueslen  = N;
171   }
172   vsingle = b->setvaluescopy;
173 
174   for (i=0; i<N; i++) {
175     vsingle[i] = v[i];
176   }
177   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
184 {
185   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
186   PetscErrorCode ierr;
187   PetscInt       i,N = m*n*b->bs2;
188   MatScalar      *vsingle;
189 
190   PetscFunctionBegin;
191   if (N > b->setvalueslen) {
192     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
193     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
194     b->setvalueslen  = N;
195   }
196   vsingle = b->setvaluescopy;
197   for (i=0; i<N; i++) {
198     vsingle[i] = v[i];
199   }
200   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
201   PetscFunctionReturn(0);
202 }
203 #endif
204 
205 /* Only add/insert a(i,j) with i<=j (blocks).
206    Any a(i,j) with i>j input by user is ingored.
207 */
208 #undef __FUNCT__
209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
211 {
212   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
213   MatScalar      value;
214   PetscTruth     roworiented = baij->roworiented;
215   PetscErrorCode ierr;
216   PetscInt       i,j,row,col;
217   PetscInt       rstart_orig=baij->rstart_bs;
218   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
219   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
220 
221   /* Some Variables required in the macro */
222   Mat            A = baij->A;
223   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
224   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
225   MatScalar      *aa=a->a;
226 
227   Mat            B = baij->B;
228   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
229   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
230   MatScalar     *ba=b->a;
231 
232   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
233   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
234   MatScalar     *ap,*bap;
235 
236   /* for stash */
237   PetscInt      n_loc, *in_loc=0;
238   MatScalar     *v_loc=0;
239 
240   PetscFunctionBegin;
241 
242   if(!baij->donotstash){
243     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
244     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
245   }
246 
247   for (i=0; i<m; i++) {
248     if (im[i] < 0) continue;
249 #if defined(PETSC_USE_DEBUG)
250     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
251 #endif
252     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
253       row = im[i] - rstart_orig;              /* local row index */
254       for (j=0; j<n; j++) {
255         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
256         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
257           col = in[j] - cstart_orig;          /* local col index */
258           brow = row/bs; bcol = col/bs;
259           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
260           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
261           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         } else if (in[j] < 0) continue;
264 #if defined(PETSC_USE_DEBUG)
265         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
266 #endif
267         else {  /* off-diag entry (B) */
268           if (mat->was_assembled) {
269             if (!baij->colmap) {
270               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
271             }
272 #if defined (PETSC_USE_CTABLE)
273             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
274             col  = col - 1;
275 #else
276             col = baij->colmap[in[j]/bs] - 1;
277 #endif
278             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
279               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
280               col =  in[j];
281               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
282               B = baij->B;
283               b = (Mat_SeqBAIJ*)(B)->data;
284               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
285               ba=b->a;
286             } else col += in[j]%bs;
287           } else col = in[j];
288           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
289           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
290           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
291         }
292       }
293     } else {  /* off processor entry */
294       if (!baij->donotstash) {
295         n_loc = 0;
296         for (j=0; j<n; j++){
297           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
298           in_loc[n_loc] = in[j];
299           if (roworiented) {
300             v_loc[n_loc] = v[i*n+j];
301           } else {
302             v_loc[n_loc] = v[j*m+i];
303           }
304           n_loc++;
305         }
306         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
307       }
308     }
309   }
310 
311   if(!baij->donotstash){
312     ierr = PetscFree(in_loc);CHKERRQ(ierr);
313     ierr = PetscFree(v_loc);CHKERRQ(ierr);
314   }
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
321 {
322   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
323   const MatScalar *value;
324   MatScalar       *barray=baij->barray;
325   PetscTruth      roworiented = baij->roworiented;
326   PetscErrorCode  ierr;
327   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
328   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
329   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
330 
331   PetscFunctionBegin;
332   if(!barray) {
333     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
334     baij->barray = barray;
335   }
336 
337   if (roworiented) {
338     stepval = (n-1)*bs;
339   } else {
340     stepval = (m-1)*bs;
341   }
342   for (i=0; i<m; i++) {
343     if (im[i] < 0) continue;
344 #if defined(PETSC_USE_DEBUG)
345     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
346 #endif
347     if (im[i] >= rstart && im[i] < rend) {
348       row = im[i] - rstart;
349       for (j=0; j<n; j++) {
350         /* If NumCol = 1 then a copy is not required */
351         if ((roworiented) && (n == 1)) {
352           barray = (MatScalar*) v + i*bs2;
353         } else if((!roworiented) && (m == 1)) {
354           barray = (MatScalar*) v + j*bs2;
355         } else { /* Here a copy is required */
356           if (roworiented) {
357             value = v + i*(stepval+bs)*bs + j*bs;
358           } else {
359             value = v + j*(stepval+bs)*bs + i*bs;
360           }
361           for (ii=0; ii<bs; ii++,value+=stepval) {
362             for (jj=0; jj<bs; jj++) {
363               *barray++  = *value++;
364             }
365           }
366           barray -=bs2;
367         }
368 
369         if (in[j] >= cstart && in[j] < cend){
370           col  = in[j] - cstart;
371           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
372         }
373         else if (in[j] < 0) continue;
374 #if defined(PETSC_USE_DEBUG)
375         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
376 #endif
377         else {
378           if (mat->was_assembled) {
379             if (!baij->colmap) {
380               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381             }
382 
383 #if defined(PETSC_USE_DEBUG)
384 #if defined (PETSC_USE_CTABLE)
385             { PetscInt data;
386               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
387               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
388             }
389 #else
390             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
391 #endif
392 #endif
393 #if defined (PETSC_USE_CTABLE)
394 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
395             col  = (col - 1)/bs;
396 #else
397             col = (baij->colmap[in[j]] - 1)/bs;
398 #endif
399             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
400               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
401               col =  in[j];
402             }
403           }
404           else col = in[j];
405           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
406         }
407       }
408     } else {
409       if (!baij->donotstash) {
410         if (roworiented) {
411           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
412         } else {
413           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
414         }
415       }
416     }
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatGetValues_MPISBAIJ"
423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
424 {
425   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
426   PetscErrorCode ierr;
427   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
428   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
429 
430   PetscFunctionBegin;
431   for (i=0; i<m; i++) {
432     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
433     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
434     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
435       row = idxm[i] - bsrstart;
436       for (j=0; j<n; j++) {
437         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
438         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
439         if (idxn[j] >= bscstart && idxn[j] < bscend){
440           col = idxn[j] - bscstart;
441           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442         } else {
443           if (!baij->colmap) {
444             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
445           }
446 #if defined (PETSC_USE_CTABLE)
447           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
448           data --;
449 #else
450           data = baij->colmap[idxn[j]/bs]-1;
451 #endif
452           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
453           else {
454             col  = data + idxn[j]%bs;
455             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
456           }
457         }
458       }
459     } else {
460       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
461     }
462   }
463  PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "MatNorm_MPISBAIJ"
468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
469 {
470   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
471   PetscErrorCode ierr;
472   PetscReal      sum[2],*lnorm2;
473 
474   PetscFunctionBegin;
475   if (baij->size == 1) {
476     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
477   } else {
478     if (type == NORM_FROBENIUS) {
479       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
480       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
481       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
482       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
483       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
484       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
485       *norm = sqrt(sum[0] + 2*sum[1]);
486       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
487     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
488       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
489       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
490       PetscReal    *rsum,*rsum2,vabs;
491       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstart,nz;
492       PetscInt     brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs;
493       MatScalar    *v;
494 
495       ierr  = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
496       rsum2 = rsum + mat->N;
497       ierr  = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
498       /* Amat */
499       v = amat->a; jj = amat->j;
500       for (brow=0; brow<mbs; brow++) {
501         grow = bs*(rstart + brow);
502         nz = amat->i[brow+1] - amat->i[brow];
503         for (bcol=0; bcol<nz; bcol++){
504           gcol = bs*(rstart + *jj); jj++;
505           for (col=0; col<bs; col++){
506             for (row=0; row<bs; row++){
507               vabs = PetscAbsScalar(*v); v++;
508               rsum[gcol+col] += vabs;
509               /* non-diagonal block */
510               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
511             }
512           }
513         }
514       }
515       /* Bmat */
516       v = bmat->a; jj = bmat->j;
517       for (brow=0; brow<mbs; brow++) {
518         grow = bs*(rstart + brow);
519         nz = bmat->i[brow+1] - bmat->i[brow];
520         for (bcol=0; bcol<nz; bcol++){
521           gcol = bs*garray[*jj]; jj++;
522           for (col=0; col<bs; col++){
523             for (row=0; row<bs; row++){
524               vabs = PetscAbsScalar(*v); v++;
525               rsum[gcol+col] += vabs;
526               rsum[grow+row] += vabs;
527             }
528           }
529         }
530       }
531       ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
532       *norm = 0.0;
533       for (col=0; col<mat->N; col++) {
534         if (rsum2[col] > *norm) *norm = rsum2[col];
535       }
536       ierr = PetscFree(rsum);CHKERRQ(ierr);
537     } else {
538       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
539     }
540   }
541   PetscFunctionReturn(0);
542 }
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
547 {
548   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
549   PetscErrorCode ierr;
550   PetscInt       nstash,reallocs;
551   InsertMode     addv;
552 
553   PetscFunctionBegin;
554   if (baij->donotstash) {
555     PetscFunctionReturn(0);
556   }
557 
558   /* make sure all processors are either in INSERTMODE or ADDMODE */
559   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
560   if (addv == (ADD_VALUES|INSERT_VALUES)) {
561     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
562   }
563   mat->insertmode = addv; /* in case this processor had no cache */
564 
565   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
566   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
567   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
568   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
569   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
570   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
577 {
578   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
579   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
580   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
581   PetscErrorCode ierr;
582   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
583   PetscInt       *row,*col,other_disassembled;
584   PetscMPIInt    n;
585   PetscTruth     r1,r2,r3;
586   MatScalar      *val;
587   InsertMode     addv = mat->insertmode;
588 
589   PetscFunctionBegin;
590 
591   if (!baij->donotstash) {
592     while (1) {
593       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
594       if (!flg) break;
595 
596       for (i=0; i<n;) {
597         /* Now identify the consecutive vals belonging to the same row */
598         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
599         if (j < n) ncols = j-i;
600         else       ncols = n-i;
601         /* Now assemble all these values with a single function call */
602         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
603         i = j;
604       }
605     }
606     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
607     /* Now process the block-stash. Since the values are stashed column-oriented,
608        set the roworiented flag to column oriented, and after MatSetValues()
609        restore the original flags */
610     r1 = baij->roworiented;
611     r2 = a->roworiented;
612     r3 = b->roworiented;
613     baij->roworiented = PETSC_FALSE;
614     a->roworiented    = PETSC_FALSE;
615     b->roworiented    = PETSC_FALSE;
616     while (1) {
617       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
618       if (!flg) break;
619 
620       for (i=0; i<n;) {
621         /* Now identify the consecutive vals belonging to the same row */
622         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
623         if (j < n) ncols = j-i;
624         else       ncols = n-i;
625         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
626         i = j;
627       }
628     }
629     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
630     baij->roworiented = r1;
631     a->roworiented    = r2;
632     b->roworiented    = r3;
633   }
634 
635   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
636   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
637 
638   /* determine if any processor has disassembled, if so we must
639      also disassemble ourselfs, in order that we may reassemble. */
640   /*
641      if nonzero structure of submatrix B cannot change then we know that
642      no processor disassembled thus we can skip this stuff
643   */
644   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
645     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
646     if (mat->was_assembled && !other_disassembled) {
647       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
648     }
649   }
650 
651   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
652     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
653   }
654   b->compressedrow.use = PETSC_TRUE;
655   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
656   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
657 
658   if (baij->rowvalues) {
659     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
660     baij->rowvalues = 0;
661   }
662 
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
669 {
670   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
671   PetscErrorCode    ierr;
672   PetscInt          bs = mat->bs;
673   PetscMPIInt       size = baij->size,rank = baij->rank;
674   PetscTruth        iascii,isdraw;
675   PetscViewer       sviewer;
676   PetscViewerFormat format;
677 
678   PetscFunctionBegin;
679   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
680   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
681   if (iascii) {
682     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
683     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
684       MatInfo info;
685       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
686       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
687       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
688               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
689               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
690       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
691       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
692       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
693       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
694       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
695       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
696       PetscFunctionReturn(0);
697     } else if (format == PETSC_VIEWER_ASCII_INFO) {
698       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
699       PetscFunctionReturn(0);
700     }
701   }
702 
703   if (isdraw) {
704     PetscDraw       draw;
705     PetscTruth isnull;
706     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
707     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
708   }
709 
710   if (size == 1) {
711     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
712     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
713   } else {
714     /* assemble the entire matrix onto first processor. */
715     Mat         A;
716     Mat_SeqSBAIJ *Aloc;
717     Mat_SeqBAIJ *Bloc;
718     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
719     MatScalar   *a;
720 
721     /* Should this be the same type as mat? */
722     if (!rank) {
723       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
724     } else {
725       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
726     }
727     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
728     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
729     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
730 
731     /* copy over the A part */
732     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
733     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
734     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
735 
736     for (i=0; i<mbs; i++) {
737       rvals[0] = bs*(baij->rstart + i);
738       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
739       for (j=ai[i]; j<ai[i+1]; j++) {
740         col = (baij->cstart+aj[j])*bs;
741         for (k=0; k<bs; k++) {
742           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
743           col++; a += bs;
744         }
745       }
746     }
747     /* copy over the B part */
748     Bloc = (Mat_SeqBAIJ*)baij->B->data;
749     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
750     for (i=0; i<mbs; i++) {
751       rvals[0] = bs*(baij->rstart + i);
752       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
753       for (j=ai[i]; j<ai[i+1]; j++) {
754         col = baij->garray[aj[j]]*bs;
755         for (k=0; k<bs; k++) {
756           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
757           col++; a += bs;
758         }
759       }
760     }
761     ierr = PetscFree(rvals);CHKERRQ(ierr);
762     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
763     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
764     /*
765        Everyone has to call to draw the matrix since the graphics waits are
766        synchronized across all processors that share the PetscDraw object
767     */
768     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
769     if (!rank) {
770       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
771       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
772     }
773     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
774     ierr = MatDestroy(A);CHKERRQ(ierr);
775   }
776   PetscFunctionReturn(0);
777 }
778 
779 #undef __FUNCT__
780 #define __FUNCT__ "MatView_MPISBAIJ"
781 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
782 {
783   PetscErrorCode ierr;
784   PetscTruth     iascii,isdraw,issocket,isbinary;
785 
786   PetscFunctionBegin;
787   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
788   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
789   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
790   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
791   if (iascii || isdraw || issocket || isbinary) {
792     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
793   } else {
794     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
795   }
796   PetscFunctionReturn(0);
797 }
798 
799 #undef __FUNCT__
800 #define __FUNCT__ "MatDestroy_MPISBAIJ"
801 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
802 {
803   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
804   PetscErrorCode ierr;
805 
806   PetscFunctionBegin;
807 #if defined(PETSC_USE_LOG)
808   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
809 #endif
810   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
811   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
812   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
813   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
814   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
815 #if defined (PETSC_USE_CTABLE)
816   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
817 #else
818   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
819 #endif
820   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
821   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
822   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
823   if (baij->slvec0) {
824     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
825     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
826   }
827   if (baij->slvec1) {
828     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
829     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
830     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
831   }
832   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
833   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
834   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
835   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
836 #if defined(PETSC_USE_MAT_SINGLE)
837   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
838 #endif
839   ierr = PetscFree(baij);CHKERRQ(ierr);
840 
841   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
842   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
843   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
844   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatMult_MPISBAIJ"
850 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
851 {
852   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
853   PetscErrorCode ierr;
854   PetscInt       nt,mbs=a->mbs,bs=A->bs;
855   PetscScalar    *x,*from,zero=0.0;
856 
857   PetscFunctionBegin;
858   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
859   if (nt != A->n) {
860     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
861   }
862   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
863   if (nt != A->m) {
864     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
865   }
866 
867   /* diagonal part */
868   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
869   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
870 
871   /* subdiagonal part */
872   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
873 
874   /* copy x into the vec slvec0 */
875   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
876   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
877   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
878   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
879 
880   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
881   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
882   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
883 
884   /* supperdiagonal part */
885   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
886 
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
892 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
893 {
894   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
895   PetscErrorCode ierr;
896   PetscInt       nt;
897 
898   PetscFunctionBegin;
899   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
900   if (nt != A->n) {
901     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
902   }
903   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
904   if (nt != A->m) {
905     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
906   }
907 
908   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
909   /* do diagonal part */
910   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
911   /* do supperdiagonal part */
912   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
913   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
914   /* do subdiagonal part */
915   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
916   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
917   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
918 
919   PetscFunctionReturn(0);
920 }
921 
922 #undef __FUNCT__
923 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
924 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
925 {
926   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
927   PetscErrorCode ierr;
928   PetscInt       mbs=a->mbs,bs=A->bs;
929   PetscScalar    *x,*from,zero=0.0;
930 
931   PetscFunctionBegin;
932   /*
933   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
934   PetscSynchronizedFlush(A->comm);
935   */
936   /* diagonal part */
937   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
938   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
939 
940   /* subdiagonal part */
941   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
942 
943   /* copy x into the vec slvec0 */
944   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
945   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
946   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
947   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
948 
949   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
950   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
951   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
952 
953   /* supperdiagonal part */
954   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
955 
956   PetscFunctionReturn(0);
957 }
958 
959 #undef __FUNCT__
960 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
961 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
962 {
963   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
964   PetscErrorCode ierr;
965 
966   PetscFunctionBegin;
967   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
968   /* do diagonal part */
969   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
970   /* do supperdiagonal part */
971   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
972   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
973 
974   /* do subdiagonal part */
975   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
976   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
977   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
978 
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
984 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
985 {
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
995 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
996 {
997   PetscErrorCode ierr;
998 
999   PetscFunctionBegin;
1000   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 /*
1005   This only works correctly for square matrices where the subblock A->A is the
1006    diagonal block
1007 */
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1010 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1011 {
1012   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1013   PetscErrorCode ierr;
1014 
1015   PetscFunctionBegin;
1016   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1017   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatScale_MPISBAIJ"
1023 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1024 {
1025   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1026   PetscErrorCode ierr;
1027 
1028   PetscFunctionBegin;
1029   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1030   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 #undef __FUNCT__
1035 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1036 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1037 {
1038   PetscFunctionBegin;
1039   SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format");
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 #undef __FUNCT__
1044 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1045 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1046 {
1047   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1048 
1049   PetscFunctionBegin;
1050   if (!baij->getrowactive) {
1051     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1052   }
1053   baij->getrowactive = PETSC_FALSE;
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1059 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1060 {
1061   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1062   PetscErrorCode ierr;
1063 
1064   PetscFunctionBegin;
1065   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1066   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1072 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1073 {
1074   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1075   Mat            A = a->A,B = a->B;
1076   PetscErrorCode ierr;
1077   PetscReal      isend[5],irecv[5];
1078 
1079   PetscFunctionBegin;
1080   info->block_size     = (PetscReal)matin->bs;
1081   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1082   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1083   isend[3] = info->memory;  isend[4] = info->mallocs;
1084   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1085   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1086   isend[3] += info->memory;  isend[4] += info->mallocs;
1087   if (flag == MAT_LOCAL) {
1088     info->nz_used      = isend[0];
1089     info->nz_allocated = isend[1];
1090     info->nz_unneeded  = isend[2];
1091     info->memory       = isend[3];
1092     info->mallocs      = isend[4];
1093   } else if (flag == MAT_GLOBAL_MAX) {
1094     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1095     info->nz_used      = irecv[0];
1096     info->nz_allocated = irecv[1];
1097     info->nz_unneeded  = irecv[2];
1098     info->memory       = irecv[3];
1099     info->mallocs      = irecv[4];
1100   } else if (flag == MAT_GLOBAL_SUM) {
1101     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1102     info->nz_used      = irecv[0];
1103     info->nz_allocated = irecv[1];
1104     info->nz_unneeded  = irecv[2];
1105     info->memory       = irecv[3];
1106     info->mallocs      = irecv[4];
1107   } else {
1108     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1109   }
1110   info->rows_global       = (PetscReal)A->M;
1111   info->columns_global    = (PetscReal)A->N;
1112   info->rows_local        = (PetscReal)A->m;
1113   info->columns_local     = (PetscReal)A->N;
1114   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1115   info->fill_ratio_needed = 0;
1116   info->factor_mallocs    = 0;
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1122 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1123 {
1124   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1125   PetscErrorCode ierr;
1126 
1127   PetscFunctionBegin;
1128   switch (op) {
1129   case MAT_NO_NEW_NONZERO_LOCATIONS:
1130   case MAT_YES_NEW_NONZERO_LOCATIONS:
1131   case MAT_COLUMNS_UNSORTED:
1132   case MAT_COLUMNS_SORTED:
1133   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1134   case MAT_KEEP_ZEROED_ROWS:
1135   case MAT_NEW_NONZERO_LOCATION_ERR:
1136     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1137     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1138     break;
1139   case MAT_ROW_ORIENTED:
1140     a->roworiented = PETSC_TRUE;
1141     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1142     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1143     break;
1144   case MAT_ROWS_SORTED:
1145   case MAT_ROWS_UNSORTED:
1146   case MAT_YES_NEW_DIAGONALS:
1147     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1148     break;
1149   case MAT_COLUMN_ORIENTED:
1150     a->roworiented = PETSC_FALSE;
1151     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1152     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1153     break;
1154   case MAT_IGNORE_OFF_PROC_ENTRIES:
1155     a->donotstash = PETSC_TRUE;
1156     break;
1157   case MAT_NO_NEW_DIAGONALS:
1158     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1159   case MAT_USE_HASH_TABLE:
1160     a->ht_flag = PETSC_TRUE;
1161     break;
1162   case MAT_NOT_SYMMETRIC:
1163   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1164   case MAT_HERMITIAN:
1165     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1166   case MAT_SYMMETRIC:
1167   case MAT_STRUCTURALLY_SYMMETRIC:
1168   case MAT_NOT_HERMITIAN:
1169   case MAT_SYMMETRY_ETERNAL:
1170   case MAT_NOT_SYMMETRY_ETERNAL:
1171     break;
1172   default:
1173     SETERRQ(PETSC_ERR_SUP,"unknown option");
1174   }
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNCT__
1179 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1180 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1181 {
1182   PetscErrorCode ierr;
1183   PetscFunctionBegin;
1184   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1190 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1191 {
1192   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1193   Mat            a=baij->A, b=baij->B;
1194   PetscErrorCode ierr;
1195   PetscInt       nv,m,n;
1196   PetscTruth     flg;
1197 
1198   PetscFunctionBegin;
1199   if (ll != rr){
1200     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1201     if (!flg)
1202       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1203   }
1204   if (!ll) PetscFunctionReturn(0);
1205 
1206   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1207   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1208 
1209   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1210   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1211 
1212   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1213 
1214   /* left diagonalscale the off-diagonal part */
1215   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1216 
1217   /* scale the diagonal part */
1218   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1219 
1220   /* right diagonalscale the off-diagonal part */
1221   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1222   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 #undef __FUNCT__
1227 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1228 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1229 {
1230   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1231   MPI_Comm          comm = A->comm;
1232   static PetscTruth called = PETSC_FALSE;
1233   PetscErrorCode    ierr;
1234 
1235   PetscFunctionBegin;
1236   if (!a->rank) {
1237     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1238   }
1239   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1240   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1241   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1247 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1248 {
1249   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1250   PetscErrorCode ierr;
1251 
1252   PetscFunctionBegin;
1253   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatEqual_MPISBAIJ"
1261 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1262 {
1263   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1264   Mat            a,b,c,d;
1265   PetscTruth     flg;
1266   PetscErrorCode ierr;
1267 
1268   PetscFunctionBegin;
1269   a = matA->A; b = matA->B;
1270   c = matB->A; d = matB->B;
1271 
1272   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1273   if (flg) {
1274     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1275   }
1276   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1282 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1283 {
1284   PetscErrorCode ierr;
1285 
1286   PetscFunctionBegin;
1287   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 #undef __FUNCT__
1292 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1293 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1294 {
1295   PetscErrorCode ierr;
1296   PetscInt       i;
1297   PetscTruth     flg;
1298 
1299   PetscFunctionBegin;
1300   for (i=0; i<n; i++) {
1301     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1302     if (!flg) {
1303       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1304     }
1305   }
1306   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 
1311 /* -------------------------------------------------------------------*/
1312 static struct _MatOps MatOps_Values = {
1313        MatSetValues_MPISBAIJ,
1314        MatGetRow_MPISBAIJ,
1315        MatRestoreRow_MPISBAIJ,
1316        MatMult_MPISBAIJ,
1317 /* 4*/ MatMultAdd_MPISBAIJ,
1318        MatMultTranspose_MPISBAIJ,
1319        MatMultTransposeAdd_MPISBAIJ,
1320        0,
1321        0,
1322        0,
1323 /*10*/ 0,
1324        0,
1325        0,
1326        MatRelax_MPISBAIJ,
1327        MatTranspose_MPISBAIJ,
1328 /*15*/ MatGetInfo_MPISBAIJ,
1329        MatEqual_MPISBAIJ,
1330        MatGetDiagonal_MPISBAIJ,
1331        MatDiagonalScale_MPISBAIJ,
1332        MatNorm_MPISBAIJ,
1333 /*20*/ MatAssemblyBegin_MPISBAIJ,
1334        MatAssemblyEnd_MPISBAIJ,
1335        0,
1336        MatSetOption_MPISBAIJ,
1337        MatZeroEntries_MPISBAIJ,
1338 /*25*/ 0,
1339        0,
1340        0,
1341        0,
1342        0,
1343 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1344        0,
1345        0,
1346        0,
1347        0,
1348 /*35*/ MatDuplicate_MPISBAIJ,
1349        0,
1350        0,
1351        0,
1352        0,
1353 /*40*/ 0,
1354        MatGetSubMatrices_MPISBAIJ,
1355        MatIncreaseOverlap_MPISBAIJ,
1356        MatGetValues_MPISBAIJ,
1357        0,
1358 /*45*/ MatPrintHelp_MPISBAIJ,
1359        MatScale_MPISBAIJ,
1360        0,
1361        0,
1362        0,
1363 /*50*/ 0,
1364        0,
1365        0,
1366        0,
1367        0,
1368 /*55*/ 0,
1369        0,
1370        MatSetUnfactored_MPISBAIJ,
1371        0,
1372        MatSetValuesBlocked_MPISBAIJ,
1373 /*60*/ 0,
1374        0,
1375        0,
1376        MatGetPetscMaps_Petsc,
1377        0,
1378 /*65*/ 0,
1379        0,
1380        0,
1381        0,
1382        0,
1383 /*70*/ MatGetRowMax_MPISBAIJ,
1384        0,
1385        0,
1386        0,
1387        0,
1388 /*75*/ 0,
1389        0,
1390        0,
1391        0,
1392        0,
1393 /*80*/ 0,
1394        0,
1395        0,
1396        0,
1397        MatLoad_MPISBAIJ,
1398 /*85*/ 0,
1399        0,
1400        0,
1401        0,
1402        0,
1403 /*90*/ 0,
1404        0,
1405        0,
1406        0,
1407        0,
1408 /*95*/ 0,
1409        0,
1410        0,
1411        0};
1412 
1413 
1414 EXTERN_C_BEGIN
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1417 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1418 {
1419   PetscFunctionBegin;
1420   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1421   *iscopy = PETSC_FALSE;
1422   PetscFunctionReturn(0);
1423 }
1424 EXTERN_C_END
1425 
1426 EXTERN_C_BEGIN
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1429 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1430 {
1431   Mat_MPISBAIJ   *b;
1432   PetscErrorCode ierr;
1433   PetscInt       i,mbs,Mbs;
1434 
1435   PetscFunctionBegin;
1436   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1437 
1438   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1439   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1440   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1441   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1442   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1443   if (d_nnz) {
1444     for (i=0; i<B->m/bs; i++) {
1445       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1446     }
1447   }
1448   if (o_nnz) {
1449     for (i=0; i<B->m/bs; i++) {
1450       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1451     }
1452   }
1453   B->preallocated = PETSC_TRUE;
1454   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1455   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1456   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1457   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1458 
1459   b   = (Mat_MPISBAIJ*)B->data;
1460   mbs = B->m/bs;
1461   Mbs = B->M/bs;
1462   if (mbs*bs != B->m) {
1463     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1464   }
1465 
1466   B->bs  = bs;
1467   b->bs2 = bs*bs;
1468   b->mbs = mbs;
1469   b->nbs = mbs;
1470   b->Mbs = Mbs;
1471   b->Nbs = Mbs;
1472 
1473   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1474   b->rowners[0]    = 0;
1475   for (i=2; i<=b->size; i++) {
1476     b->rowners[i] += b->rowners[i-1];
1477   }
1478   b->rstart    = b->rowners[b->rank];
1479   b->rend      = b->rowners[b->rank+1];
1480   b->cstart    = b->rstart;
1481   b->cend      = b->rend;
1482   for (i=0; i<=b->size; i++) {
1483     b->rowners_bs[i] = b->rowners[i]*bs;
1484   }
1485   b->rstart_bs = b-> rstart*bs;
1486   b->rend_bs   = b->rend*bs;
1487 
1488   b->cstart_bs = b->cstart*bs;
1489   b->cend_bs   = b->cend*bs;
1490 
1491   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1492   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1493   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1494   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1495 
1496   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1497   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1498   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1499   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1500 
1501   /* build cache for off array entries formed */
1502   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1503 
1504   PetscFunctionReturn(0);
1505 }
1506 EXTERN_C_END
1507 
1508 /*MC
1509    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1510    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1511 
1512    Options Database Keys:
1513 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1514 
1515   Level: beginner
1516 
1517 .seealso: MatCreateMPISBAIJ
1518 M*/
1519 
1520 EXTERN_C_BEGIN
1521 #undef __FUNCT__
1522 #define __FUNCT__ "MatCreate_MPISBAIJ"
1523 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1524 {
1525   Mat_MPISBAIJ   *b;
1526   PetscErrorCode ierr;
1527   PetscTruth     flg;
1528 
1529   PetscFunctionBegin;
1530 
1531   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1532   B->data = (void*)b;
1533   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1534 
1535   B->ops->destroy    = MatDestroy_MPISBAIJ;
1536   B->ops->view       = MatView_MPISBAIJ;
1537   B->mapping    = 0;
1538   B->factor     = 0;
1539   B->assembled  = PETSC_FALSE;
1540 
1541   B->insertmode = NOT_SET_VALUES;
1542   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1543   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1544 
1545   /* build local table of row and column ownerships */
1546   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1547   b->cowners    = b->rowners + b->size + 2;
1548   b->rowners_bs = b->cowners + b->size + 2;
1549   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1550 
1551   /* build cache for off array entries formed */
1552   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1553   b->donotstash  = PETSC_FALSE;
1554   b->colmap      = PETSC_NULL;
1555   b->garray      = PETSC_NULL;
1556   b->roworiented = PETSC_TRUE;
1557 
1558 #if defined(PETSC_USE_MAT_SINGLE)
1559   /* stuff for MatSetValues_XXX in single precision */
1560   b->setvalueslen     = 0;
1561   b->setvaluescopy    = PETSC_NULL;
1562 #endif
1563 
1564   /* stuff used in block assembly */
1565   b->barray       = 0;
1566 
1567   /* stuff used for matrix vector multiply */
1568   b->lvec         = 0;
1569   b->Mvctx        = 0;
1570   b->slvec0       = 0;
1571   b->slvec0b      = 0;
1572   b->slvec1       = 0;
1573   b->slvec1a      = 0;
1574   b->slvec1b      = 0;
1575   b->sMvctx       = 0;
1576 
1577   /* stuff for MatGetRow() */
1578   b->rowindices   = 0;
1579   b->rowvalues    = 0;
1580   b->getrowactive = PETSC_FALSE;
1581 
1582   /* hash table stuff */
1583   b->ht           = 0;
1584   b->hd           = 0;
1585   b->ht_size      = 0;
1586   b->ht_flag      = PETSC_FALSE;
1587   b->ht_fact      = 0;
1588   b->ht_total_ct  = 0;
1589   b->ht_insert_ct = 0;
1590 
1591   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1592   if (flg) {
1593     PetscReal fact = 1.39;
1594     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1595     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1596     if (fact <= 1.0) fact = 1.39;
1597     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1598     ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
1599   }
1600   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1601                                      "MatStoreValues_MPISBAIJ",
1602                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1603   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1604                                      "MatRetrieveValues_MPISBAIJ",
1605                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1606   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1607                                      "MatGetDiagonalBlock_MPISBAIJ",
1608                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1609   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1610                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1611                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1612   B->symmetric                  = PETSC_TRUE;
1613   B->structurally_symmetric     = PETSC_TRUE;
1614   B->symmetric_set              = PETSC_TRUE;
1615   B->structurally_symmetric_set = PETSC_TRUE;
1616   PetscFunctionReturn(0);
1617 }
1618 EXTERN_C_END
1619 
1620 /*MC
1621    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1622 
1623    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1624    and MATMPISBAIJ otherwise.
1625 
1626    Options Database Keys:
1627 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1628 
1629   Level: beginner
1630 
1631 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1632 M*/
1633 
1634 EXTERN_C_BEGIN
1635 #undef __FUNCT__
1636 #define __FUNCT__ "MatCreate_SBAIJ"
1637 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1638 {
1639   PetscErrorCode ierr;
1640   PetscMPIInt    size;
1641 
1642   PetscFunctionBegin;
1643   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1644   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1645   if (size == 1) {
1646     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1647   } else {
1648     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1649   }
1650   PetscFunctionReturn(0);
1651 }
1652 EXTERN_C_END
1653 
1654 #undef __FUNCT__
1655 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1656 /*@C
1657    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1658    the user should preallocate the matrix storage by setting the parameters
1659    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1660    performance can be increased by more than a factor of 50.
1661 
1662    Collective on Mat
1663 
1664    Input Parameters:
1665 +  A - the matrix
1666 .  bs   - size of blockk
1667 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1668            submatrix  (same for all local rows)
1669 .  d_nnz - array containing the number of block nonzeros in the various block rows
1670            in the upper triangular and diagonal part of the in diagonal portion of the local
1671            (possibly different for each block row) or PETSC_NULL.  You must leave room
1672            for the diagonal entry even if it is zero.
1673 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1674            submatrix (same for all local rows).
1675 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1676            off-diagonal portion of the local submatrix (possibly different for
1677            each block row) or PETSC_NULL.
1678 
1679 
1680    Options Database Keys:
1681 .   -mat_no_unroll - uses code that does not unroll the loops in the
1682                      block calculations (much slower)
1683 .   -mat_block_size - size of the blocks to use
1684 
1685    Notes:
1686 
1687    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1688    than it must be used on all processors that share the object for that argument.
1689 
1690    If the *_nnz parameter is given then the *_nz parameter is ignored
1691 
1692    Storage Information:
1693    For a square global matrix we define each processor's diagonal portion
1694    to be its local rows and the corresponding columns (a square submatrix);
1695    each processor's off-diagonal portion encompasses the remainder of the
1696    local matrix (a rectangular submatrix).
1697 
1698    The user can specify preallocated storage for the diagonal part of
1699    the local submatrix with either d_nz or d_nnz (not both).  Set
1700    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1701    memory allocation.  Likewise, specify preallocated storage for the
1702    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1703 
1704    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1705    the figure below we depict these three local rows and all columns (0-11).
1706 
1707 .vb
1708            0 1 2 3 4 5 6 7 8 9 10 11
1709           -------------------
1710    row 3  |  o o o d d d o o o o o o
1711    row 4  |  o o o d d d o o o o o o
1712    row 5  |  o o o d d d o o o o o o
1713           -------------------
1714 .ve
1715 
1716    Thus, any entries in the d locations are stored in the d (diagonal)
1717    submatrix, and any entries in the o locations are stored in the
1718    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1719    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1720 
1721    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1722    plus the diagonal part of the d matrix,
1723    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1724    In general, for PDE problems in which most nonzeros are near the diagonal,
1725    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1726    or you will get TERRIBLE performance; see the users' manual chapter on
1727    matrices.
1728 
1729    Level: intermediate
1730 
1731 .keywords: matrix, block, aij, compressed row, sparse, parallel
1732 
1733 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1734 @*/
1735 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1736 {
1737   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1738 
1739   PetscFunctionBegin;
1740   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1741   if (f) {
1742     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1743   }
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatCreateMPISBAIJ"
1749 /*@C
1750    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1751    (block compressed row).  For good matrix assembly performance
1752    the user should preallocate the matrix storage by setting the parameters
1753    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1754    performance can be increased by more than a factor of 50.
1755 
1756    Collective on MPI_Comm
1757 
1758    Input Parameters:
1759 +  comm - MPI communicator
1760 .  bs   - size of blockk
1761 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1762            This value should be the same as the local size used in creating the
1763            y vector for the matrix-vector product y = Ax.
1764 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1765            This value should be the same as the local size used in creating the
1766            x vector for the matrix-vector product y = Ax.
1767 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1768 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1769 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1770            submatrix  (same for all local rows)
1771 .  d_nnz - array containing the number of block nonzeros in the various block rows
1772            in the upper triangular portion of the in diagonal portion of the local
1773            (possibly different for each block block row) or PETSC_NULL.
1774            You must leave room for the diagonal entry even if it is zero.
1775 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1776            submatrix (same for all local rows).
1777 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1778            off-diagonal portion of the local submatrix (possibly different for
1779            each block row) or PETSC_NULL.
1780 
1781    Output Parameter:
1782 .  A - the matrix
1783 
1784    Options Database Keys:
1785 .   -mat_no_unroll - uses code that does not unroll the loops in the
1786                      block calculations (much slower)
1787 .   -mat_block_size - size of the blocks to use
1788 .   -mat_mpi - use the parallel matrix data structures even on one processor
1789                (defaults to using SeqBAIJ format on one processor)
1790 
1791    Notes:
1792    The number of rows and columns must be divisible by blocksize.
1793 
1794    The user MUST specify either the local or global matrix dimensions
1795    (possibly both).
1796 
1797    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1798    than it must be used on all processors that share the object for that argument.
1799 
1800    If the *_nnz parameter is given then the *_nz parameter is ignored
1801 
1802    Storage Information:
1803    For a square global matrix we define each processor's diagonal portion
1804    to be its local rows and the corresponding columns (a square submatrix);
1805    each processor's off-diagonal portion encompasses the remainder of the
1806    local matrix (a rectangular submatrix).
1807 
1808    The user can specify preallocated storage for the diagonal part of
1809    the local submatrix with either d_nz or d_nnz (not both).  Set
1810    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1811    memory allocation.  Likewise, specify preallocated storage for the
1812    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1813 
1814    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1815    the figure below we depict these three local rows and all columns (0-11).
1816 
1817 .vb
1818            0 1 2 3 4 5 6 7 8 9 10 11
1819           -------------------
1820    row 3  |  o o o d d d o o o o o o
1821    row 4  |  o o o d d d o o o o o o
1822    row 5  |  o o o d d d o o o o o o
1823           -------------------
1824 .ve
1825 
1826    Thus, any entries in the d locations are stored in the d (diagonal)
1827    submatrix, and any entries in the o locations are stored in the
1828    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1829    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1830 
1831    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1832    plus the diagonal part of the d matrix,
1833    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1834    In general, for PDE problems in which most nonzeros are near the diagonal,
1835    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1836    or you will get TERRIBLE performance; see the users' manual chapter on
1837    matrices.
1838 
1839    Level: intermediate
1840 
1841 .keywords: matrix, block, aij, compressed row, sparse, parallel
1842 
1843 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1844 @*/
1845 
1846 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1847 {
1848   PetscErrorCode ierr;
1849   PetscMPIInt    size;
1850 
1851   PetscFunctionBegin;
1852   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1853   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1854   if (size > 1) {
1855     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1856     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1857   } else {
1858     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1859     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1860   }
1861   PetscFunctionReturn(0);
1862 }
1863 
1864 
1865 #undef __FUNCT__
1866 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1867 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1868 {
1869   Mat            mat;
1870   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1871   PetscErrorCode ierr;
1872   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1873   PetscScalar    *array;
1874 
1875   PetscFunctionBegin;
1876   *newmat       = 0;
1877   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1878   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1879   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1880 
1881   mat->factor       = matin->factor;
1882   mat->preallocated = PETSC_TRUE;
1883   mat->assembled    = PETSC_TRUE;
1884   mat->insertmode   = NOT_SET_VALUES;
1885 
1886   a = (Mat_MPISBAIJ*)mat->data;
1887   mat->bs  = matin->bs;
1888   a->bs2   = oldmat->bs2;
1889   a->mbs   = oldmat->mbs;
1890   a->nbs   = oldmat->nbs;
1891   a->Mbs   = oldmat->Mbs;
1892   a->Nbs   = oldmat->Nbs;
1893 
1894   a->rstart       = oldmat->rstart;
1895   a->rend         = oldmat->rend;
1896   a->cstart       = oldmat->cstart;
1897   a->cend         = oldmat->cend;
1898   a->size         = oldmat->size;
1899   a->rank         = oldmat->rank;
1900   a->donotstash   = oldmat->donotstash;
1901   a->roworiented  = oldmat->roworiented;
1902   a->rowindices   = 0;
1903   a->rowvalues    = 0;
1904   a->getrowactive = PETSC_FALSE;
1905   a->barray       = 0;
1906   a->rstart_bs    = oldmat->rstart_bs;
1907   a->rend_bs      = oldmat->rend_bs;
1908   a->cstart_bs    = oldmat->cstart_bs;
1909   a->cend_bs      = oldmat->cend_bs;
1910 
1911   /* hash table stuff */
1912   a->ht           = 0;
1913   a->hd           = 0;
1914   a->ht_size      = 0;
1915   a->ht_flag      = oldmat->ht_flag;
1916   a->ht_fact      = oldmat->ht_fact;
1917   a->ht_total_ct  = 0;
1918   a->ht_insert_ct = 0;
1919 
1920   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1921   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1922   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
1923   if (oldmat->colmap) {
1924 #if defined (PETSC_USE_CTABLE)
1925     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1926 #else
1927     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1928     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1929     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1930 #endif
1931   } else a->colmap = 0;
1932 
1933   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1934     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1935     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1936     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
1937   } else a->garray = 0;
1938 
1939   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1940   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1941   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1942   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1943 
1944   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
1945   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
1946   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
1947   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
1948 
1949   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
1950   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
1951   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
1952   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
1953   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
1954   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
1955   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
1956   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
1957   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
1958   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
1959   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
1960   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
1961   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
1962 
1963   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
1964   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
1965   a->sMvctx = oldmat->sMvctx;
1966   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
1967 
1968   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1969   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1970   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1971   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1972   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
1973   *newmat = mat;
1974   PetscFunctionReturn(0);
1975 }
1976 
1977 #include "petscsys.h"
1978 
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatLoad_MPISBAIJ"
1981 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1982 {
1983   Mat            A;
1984   PetscErrorCode ierr;
1985   PetscInt       i,nz,j,rstart,rend;
1986   PetscScalar    *vals,*buf;
1987   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1988   MPI_Status     status;
1989   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners;
1990   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1991   PetscInt       *locrowlens,*procsnz = 0,jj,*mycols,*ibuf;
1992   PetscInt       bs=1,Mbs,mbs,extra_rows;
1993   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
1994   PetscInt       dcount,kmax,k,nzcount,tmp;
1995   int            fd;
1996 
1997   PetscFunctionBegin;
1998   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1999 
2000   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2001   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2002   if (!rank) {
2003     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2004     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2005     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2006     if (header[3] < 0) {
2007       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2008     }
2009   }
2010 
2011   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2012   M = header[1]; N = header[2];
2013 
2014   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2015 
2016   /*
2017      This code adds extra rows to make sure the number of rows is
2018      divisible by the blocksize
2019   */
2020   Mbs        = M/bs;
2021   extra_rows = bs - M + bs*(Mbs);
2022   if (extra_rows == bs) extra_rows = 0;
2023   else                  Mbs++;
2024   if (extra_rows &&!rank) {
2025     ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2026   }
2027 
2028   /* determine ownership of all rows */
2029   mbs        = Mbs/size + ((Mbs % size) > rank);
2030   m          = mbs*bs;
2031   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2032   browners   = rowners + size + 1;
2033   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2034   rowners[0] = 0;
2035   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2036   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2037   rstart = rowners[rank];
2038   rend   = rowners[rank+1];
2039 
2040   /* distribute row lengths to all processors */
2041   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2042   if (!rank) {
2043     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2044     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2045     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2046     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2047     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2048     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2049     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2050   } else {
2051     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2052   }
2053 
2054   if (!rank) {   /* procs[0] */
2055     /* calculate the number of nonzeros on each processor */
2056     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2057     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2058     for (i=0; i<size; i++) {
2059       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2060         procsnz[i] += rowlengths[j];
2061       }
2062     }
2063     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2064 
2065     /* determine max buffer needed and allocate it */
2066     maxnz = 0;
2067     for (i=0; i<size; i++) {
2068       maxnz = PetscMax(maxnz,procsnz[i]);
2069     }
2070     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2071 
2072     /* read in my part of the matrix column indices  */
2073     nz     = procsnz[0];
2074     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2075     mycols = ibuf;
2076     if (size == 1)  nz -= extra_rows;
2077     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2078     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2079 
2080     /* read in every ones (except the last) and ship off */
2081     for (i=1; i<size-1; i++) {
2082       nz   = procsnz[i];
2083       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2084       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2085     }
2086     /* read in the stuff for the last proc */
2087     if (size != 1) {
2088       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2089       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2090       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2091       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2092     }
2093     ierr = PetscFree(cols);CHKERRQ(ierr);
2094   } else {  /* procs[i], i>0 */
2095     /* determine buffer space needed for message */
2096     nz = 0;
2097     for (i=0; i<m; i++) {
2098       nz += locrowlens[i];
2099     }
2100     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2101     mycols = ibuf;
2102     /* receive message of column indices*/
2103     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2104     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2105     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2106   }
2107 
2108   /* loop over local rows, determining number of off diagonal entries */
2109   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2110   odlens   = dlens + (rend-rstart);
2111   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2112   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2113   masked1  = mask    + Mbs;
2114   masked2  = masked1 + Mbs;
2115   rowcount = 0; nzcount = 0;
2116   for (i=0; i<mbs; i++) {
2117     dcount  = 0;
2118     odcount = 0;
2119     for (j=0; j<bs; j++) {
2120       kmax = locrowlens[rowcount];
2121       for (k=0; k<kmax; k++) {
2122         tmp = mycols[nzcount++]/bs; /* block col. index */
2123         if (!mask[tmp]) {
2124           mask[tmp] = 1;
2125           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2126           else masked1[dcount++] = tmp; /* entry in diag portion */
2127         }
2128       }
2129       rowcount++;
2130     }
2131 
2132     dlens[i]  = dcount;  /* d_nzz[i] */
2133     odlens[i] = odcount; /* o_nzz[i] */
2134 
2135     /* zero out the mask elements we set */
2136     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2137     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2138   }
2139 
2140   /* create our matrix */
2141   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2142   ierr = MatSetType(A,type);CHKERRQ(ierr);
2143   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2144   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2145 
2146   if (!rank) {
2147     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2148     /* read in my part of the matrix numerical values  */
2149     nz = procsnz[0];
2150     vals = buf;
2151     mycols = ibuf;
2152     if (size == 1)  nz -= extra_rows;
2153     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2154     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2155 
2156     /* insert into matrix */
2157     jj      = rstart*bs;
2158     for (i=0; i<m; i++) {
2159       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2160       mycols += locrowlens[i];
2161       vals   += locrowlens[i];
2162       jj++;
2163     }
2164 
2165     /* read in other processors (except the last one) and ship out */
2166     for (i=1; i<size-1; i++) {
2167       nz   = procsnz[i];
2168       vals = buf;
2169       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2170       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2171     }
2172     /* the last proc */
2173     if (size != 1){
2174       nz   = procsnz[i] - extra_rows;
2175       vals = buf;
2176       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2177       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2178       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2179     }
2180     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2181 
2182   } else {
2183     /* receive numeric values */
2184     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2185 
2186     /* receive message of values*/
2187     vals   = buf;
2188     mycols = ibuf;
2189     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2190     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2191     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2192 
2193     /* insert into matrix */
2194     jj      = rstart*bs;
2195     for (i=0; i<m; i++) {
2196       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2197       mycols += locrowlens[i];
2198       vals   += locrowlens[i];
2199       jj++;
2200     }
2201   }
2202 
2203   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2204   ierr = PetscFree(buf);CHKERRQ(ierr);
2205   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2206   ierr = PetscFree(rowners);CHKERRQ(ierr);
2207   ierr = PetscFree(dlens);CHKERRQ(ierr);
2208   ierr = PetscFree(mask);CHKERRQ(ierr);
2209   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2210   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2211   *newmat = A;
2212   PetscFunctionReturn(0);
2213 }
2214 
2215 #undef __FUNCT__
2216 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2217 /*XXXXX@
2218    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2219 
2220    Input Parameters:
2221 .  mat  - the matrix
2222 .  fact - factor
2223 
2224    Collective on Mat
2225 
2226    Level: advanced
2227 
2228   Notes:
2229    This can also be set by the command line option: -mat_use_hash_table fact
2230 
2231 .keywords: matrix, hashtable, factor, HT
2232 
2233 .seealso: MatSetOption()
2234 @XXXXX*/
2235 
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2239 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2240 {
2241   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2242   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2243   PetscReal      atmp;
2244   PetscReal      *work,*svalues,*rvalues;
2245   PetscErrorCode ierr;
2246   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2247   PetscMPIInt    rank,size;
2248   PetscInt       *rowners_bs,dest,count,source;
2249   PetscScalar    *va;
2250   MatScalar      *ba;
2251   MPI_Status     stat;
2252 
2253   PetscFunctionBegin;
2254   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2255   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2256 
2257   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2258   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2259 
2260   bs   = A->bs;
2261   mbs  = a->mbs;
2262   Mbs  = a->Mbs;
2263   ba   = b->a;
2264   bi   = b->i;
2265   bj   = b->j;
2266 
2267   /* find ownerships */
2268   rowners_bs = a->rowners_bs;
2269 
2270   /* each proc creates an array to be distributed */
2271   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2272   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2273 
2274   /* row_max for B */
2275   if (rank != size-1){
2276     for (i=0; i<mbs; i++) {
2277       ncols = bi[1] - bi[0]; bi++;
2278       brow  = bs*i;
2279       for (j=0; j<ncols; j++){
2280         bcol = bs*(*bj);
2281         for (kcol=0; kcol<bs; kcol++){
2282           col = bcol + kcol;                 /* local col index */
2283           col += rowners_bs[rank+1];      /* global col index */
2284           for (krow=0; krow<bs; krow++){
2285             atmp = PetscAbsScalar(*ba); ba++;
2286             row = brow + krow;    /* local row index */
2287             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2288             if (work[col] < atmp) work[col] = atmp;
2289           }
2290         }
2291         bj++;
2292       }
2293     }
2294 
2295     /* send values to its owners */
2296     for (dest=rank+1; dest<size; dest++){
2297       svalues = work + rowners_bs[dest];
2298       count   = rowners_bs[dest+1]-rowners_bs[dest];
2299       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2300     }
2301   }
2302 
2303   /* receive values */
2304   if (rank){
2305     rvalues = work;
2306     count   = rowners_bs[rank+1]-rowners_bs[rank];
2307     for (source=0; source<rank; source++){
2308       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2309       /* process values */
2310       for (i=0; i<count; i++){
2311         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2312       }
2313     }
2314   }
2315 
2316   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2317   ierr = PetscFree(work);CHKERRQ(ierr);
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 #undef __FUNCT__
2322 #define __FUNCT__ "MatRelax_MPISBAIJ"
2323 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2324 {
2325   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2326   PetscErrorCode ierr;
2327   PetscInt       mbs=mat->mbs,bs=matin->bs;
2328   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2329   Vec            bb1;
2330 
2331   PetscFunctionBegin;
2332   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2333   if (bs > 1)
2334     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2335 
2336   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2337     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2338       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2339       its--;
2340     }
2341 
2342     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2343     while (its--){
2344 
2345       /* lower triangular part: slvec0b = - B^T*xx */
2346       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2347 
2348       /* copy xx into slvec0a */
2349       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2350       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2351       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2352       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2353 
2354       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2355 
2356       /* copy bb into slvec1a */
2357       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2358       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2359       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2360       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2361 
2362       /* set slvec1b = 0 */
2363       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2364 
2365       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2366       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2367       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2368       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2369 
2370       /* upper triangular part: bb1 = bb1 - B*x */
2371       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2372 
2373       /* local diagonal sweep */
2374       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2375     }
2376     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2377   } else {
2378     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2379   }
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2385 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2386 {
2387   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2388   PetscErrorCode ierr;
2389   PetscScalar    mone=-1.0;
2390   Vec            lvec1,bb1;
2391 
2392   PetscFunctionBegin;
2393   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2394   if (matin->bs > 1)
2395     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2396 
2397   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2398     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2399       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2400       its--;
2401     }
2402 
2403     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2404     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2405     while (its--){
2406       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2407 
2408       /* lower diagonal part: bb1 = bb - B^T*xx */
2409       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2410       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2411 
2412       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2413       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2414       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2415 
2416       /* upper diagonal part: bb1 = bb1 - B*x */
2417       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2418       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2419 
2420       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2421 
2422       /* diagonal sweep */
2423       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2424     }
2425     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2426     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2427   } else {
2428     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2429   }
2430   PetscFunctionReturn(0);
2431 }
2432 
2433