xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision b60d1aa9dffe7f0839cca942e8a5760aa5de1720)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #undef __FUNCT__
11 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
12 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
19   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
20   PetscFunctionReturn(0);
21 }
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
25 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
32   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
37   { \
38  \
39     brow = row/bs;  \
40     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
41     rmax = aimax[brow]; nrow = ailen[brow]; \
42     bcol = col/bs; \
43     ridx = row % bs; cidx = col % bs; \
44     low  = 0; high = nrow; \
45     while (high-low > 3) { \
46       t = (low+high)/2; \
47       if (rp[t] > bcol) high = t; \
48       else              low  = t; \
49     } \
50     for (_i=low; _i<high; _i++) { \
51       if (rp[_i] > bcol) break; \
52       if (rp[_i] == bcol) { \
53         bap = ap + bs2*_i + bs*cidx + ridx; \
54         if (addv == ADD_VALUES) *bap += value;  \
55         else                    *bap  = value;  \
56         goto a_noinsert; \
57       } \
58     } \
59     if (a->nonew == 1) goto a_noinsert; \
60     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
61     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
62     N = nrow++ - 1;  \
63     /* shift up all the later entries in this row */ \
64     for (ii=N; ii>=_i; ii--) { \
65       rp[ii+1] = rp[ii]; \
66       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
67     } \
68     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
69     rp[_i]                      = bcol;  \
70     ap[bs2*_i + bs*cidx + ridx] = value;  \
71     A->nonzerostate++;\
72 a_noinsert:; \
73     ailen[brow] = nrow; \
74   }
75 
76 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
77   { \
78     brow = row/bs;  \
79     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
80     rmax = bimax[brow]; nrow = bilen[brow]; \
81     bcol = col/bs; \
82     ridx = row % bs; cidx = col % bs; \
83     low  = 0; high = nrow; \
84     while (high-low > 3) { \
85       t = (low+high)/2; \
86       if (rp[t] > bcol) high = t; \
87       else              low  = t; \
88     } \
89     for (_i=low; _i<high; _i++) { \
90       if (rp[_i] > bcol) break; \
91       if (rp[_i] == bcol) { \
92         bap = ap + bs2*_i + bs*cidx + ridx; \
93         if (addv == ADD_VALUES) *bap += value;  \
94         else                    *bap  = value;  \
95         goto b_noinsert; \
96       } \
97     } \
98     if (b->nonew == 1) goto b_noinsert; \
99     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
100     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
101     N = nrow++ - 1;  \
102     /* shift up all the later entries in this row */ \
103     for (ii=N; ii>=_i; ii--) { \
104       rp[ii+1] = rp[ii]; \
105       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
106     } \
107     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
108     rp[_i]                      = bcol;  \
109     ap[bs2*_i + bs*cidx + ridx] = value;  \
110     B->nonzerostate++;\
111 b_noinsert:; \
112     bilen[brow] = nrow; \
113   }
114 
115 /* Only add/insert a(i,j) with i<=j (blocks).
116    Any a(i,j) with i>j input by user is ingored.
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatSetValues_MPISBAIJ"
120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
121 {
122   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
123   MatScalar      value;
124   PetscBool      roworiented = baij->roworiented;
125   PetscErrorCode ierr;
126   PetscInt       i,j,row,col;
127   PetscInt       rstart_orig=mat->rmap->rstart;
128   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
129   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
130 
131   /* Some Variables required in the macro */
132   Mat          A     = baij->A;
133   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
134   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
135   MatScalar    *aa   =a->a;
136 
137   Mat         B     = baij->B;
138   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
139   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
140   MatScalar   *ba   =b->a;
141 
142   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
143   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
144   MatScalar *ap,*bap;
145 
146   /* for stash */
147   PetscInt  n_loc, *in_loc = NULL;
148   MatScalar *v_loc = NULL;
149 
150   PetscFunctionBegin;
151   if (!baij->donotstash) {
152     if (n > baij->n_loc) {
153       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
154       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
155       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
156       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
157 
158       baij->n_loc = n;
159     }
160     in_loc = baij->in_loc;
161     v_loc  = baij->v_loc;
162   }
163 
164   for (i=0; i<m; i++) {
165     if (im[i] < 0) continue;
166 #if defined(PETSC_USE_DEBUG)
167     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
168 #endif
169     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
170       row = im[i] - rstart_orig;              /* local row index */
171       for (j=0; j<n; j++) {
172         if (im[i]/bs > in[j]/bs) {
173           if (a->ignore_ltriangular) {
174             continue;    /* ignore lower triangular blocks */
175           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
176         }
177         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
178           col  = in[j] - cstart_orig;         /* local col index */
179           brow = row/bs; bcol = col/bs;
180           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
181           if (roworiented) value = v[i*n+j];
182           else             value = v[i+j*m];
183           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
184           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
185         } else if (in[j] < 0) continue;
186 #if defined(PETSC_USE_DEBUG)
187         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
188 #endif
189         else {  /* off-diag entry (B) */
190           if (mat->was_assembled) {
191             if (!baij->colmap) {
192               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
193             }
194 #if defined(PETSC_USE_CTABLE)
195             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
196             col  = col - 1;
197 #else
198             col = baij->colmap[in[j]/bs] - 1;
199 #endif
200             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
201               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
202               col  =  in[j];
203               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
204               B    = baij->B;
205               b    = (Mat_SeqBAIJ*)(B)->data;
206               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
207               ba   = b->a;
208             } else col += in[j]%bs;
209           } else col = in[j];
210           if (roworiented) value = v[i*n+j];
211           else             value = v[i+j*m];
212           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
213           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
214         }
215       }
216     } else {  /* off processor entry */
217       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
218       if (!baij->donotstash) {
219         mat->assembled = PETSC_FALSE;
220         n_loc          = 0;
221         for (j=0; j<n; j++) {
222           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
223           in_loc[n_loc] = in[j];
224           if (roworiented) {
225             v_loc[n_loc] = v[i*n+j];
226           } else {
227             v_loc[n_loc] = v[j*m+i];
228           }
229           n_loc++;
230         }
231         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
240 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
241 {
242   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
243   const MatScalar *value;
244   MatScalar       *barray     =baij->barray;
245   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
246   PetscErrorCode  ierr;
247   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
248   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
249   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
250 
251   PetscFunctionBegin;
252   if (!barray) {
253     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
254     baij->barray = barray;
255   }
256 
257   if (roworiented) {
258     stepval = (n-1)*bs;
259   } else {
260     stepval = (m-1)*bs;
261   }
262   for (i=0; i<m; i++) {
263     if (im[i] < 0) continue;
264 #if defined(PETSC_USE_DEBUG)
265     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
266 #endif
267     if (im[i] >= rstart && im[i] < rend) {
268       row = im[i] - rstart;
269       for (j=0; j<n; j++) {
270         if (im[i] > in[j]) {
271           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
272           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
273         }
274         /* If NumCol = 1 then a copy is not required */
275         if ((roworiented) && (n == 1)) {
276           barray = (MatScalar*) v + i*bs2;
277         } else if ((!roworiented) && (m == 1)) {
278           barray = (MatScalar*) v + j*bs2;
279         } else { /* Here a copy is required */
280           if (roworiented) {
281             value = v + i*(stepval+bs)*bs + j*bs;
282           } else {
283             value = v + j*(stepval+bs)*bs + i*bs;
284           }
285           for (ii=0; ii<bs; ii++,value+=stepval) {
286             for (jj=0; jj<bs; jj++) {
287               *barray++ = *value++;
288             }
289           }
290           barray -=bs2;
291         }
292 
293         if (in[j] >= cstart && in[j] < cend) {
294           col  = in[j] - cstart;
295           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
296         } else if (in[j] < 0) continue;
297 #if defined(PETSC_USE_DEBUG)
298         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
299 #endif
300         else {
301           if (mat->was_assembled) {
302             if (!baij->colmap) {
303               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
304             }
305 
306 #if defined(PETSC_USE_DEBUG)
307 #if defined(PETSC_USE_CTABLE)
308             { PetscInt data;
309               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
310               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
311             }
312 #else
313             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
314 #endif
315 #endif
316 #if defined(PETSC_USE_CTABLE)
317             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
318             col  = (col - 1)/bs;
319 #else
320             col = (baij->colmap[in[j]] - 1)/bs;
321 #endif
322             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
323               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
324               col  = in[j];
325             }
326           } else col = in[j];
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         }
329       }
330     } else {
331       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
332       if (!baij->donotstash) {
333         if (roworiented) {
334           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
335         } else {
336           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
337         }
338       }
339     }
340   }
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNCT__
345 #define __FUNCT__ "MatGetValues_MPISBAIJ"
346 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
347 {
348   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
349   PetscErrorCode ierr;
350   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
351   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
352 
353   PetscFunctionBegin;
354   for (i=0; i<m; i++) {
355     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
356     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
357     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
358       row = idxm[i] - bsrstart;
359       for (j=0; j<n; j++) {
360         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
361         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
362         if (idxn[j] >= bscstart && idxn[j] < bscend) {
363           col  = idxn[j] - bscstart;
364           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
365         } else {
366           if (!baij->colmap) {
367             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
368           }
369 #if defined(PETSC_USE_CTABLE)
370           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
371           data--;
372 #else
373           data = baij->colmap[idxn[j]/bs]-1;
374 #endif
375           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
376           else {
377             col  = data + idxn[j]%bs;
378             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
379           }
380         }
381       }
382     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
383   }
384   PetscFunctionReturn(0);
385 }
386 
387 #undef __FUNCT__
388 #define __FUNCT__ "MatNorm_MPISBAIJ"
389 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
390 {
391   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
392   PetscErrorCode ierr;
393   PetscReal      sum[2],*lnorm2;
394 
395   PetscFunctionBegin;
396   if (baij->size == 1) {
397     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
398   } else {
399     if (type == NORM_FROBENIUS) {
400       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
401       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
402       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
403       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
404       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
405       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
406       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
407       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
408     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
409       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
410       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
411       PetscReal    *rsum,*rsum2,vabs;
412       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
413       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
414       MatScalar    *v;
415 
416       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
417       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
418       /* Amat */
419       v = amat->a; jj = amat->j;
420       for (brow=0; brow<mbs; brow++) {
421         grow = bs*(rstart + brow);
422         nz   = amat->i[brow+1] - amat->i[brow];
423         for (bcol=0; bcol<nz; bcol++) {
424           gcol = bs*(rstart + *jj); jj++;
425           for (col=0; col<bs; col++) {
426             for (row=0; row<bs; row++) {
427               vabs            = PetscAbsScalar(*v); v++;
428               rsum[gcol+col] += vabs;
429               /* non-diagonal block */
430               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
431             }
432           }
433         }
434       }
435       /* Bmat */
436       v = bmat->a; jj = bmat->j;
437       for (brow=0; brow<mbs; brow++) {
438         grow = bs*(rstart + brow);
439         nz = bmat->i[brow+1] - bmat->i[brow];
440         for (bcol=0; bcol<nz; bcol++) {
441           gcol = bs*garray[*jj]; jj++;
442           for (col=0; col<bs; col++) {
443             for (row=0; row<bs; row++) {
444               vabs            = PetscAbsScalar(*v); v++;
445               rsum[gcol+col] += vabs;
446               rsum[grow+row] += vabs;
447             }
448           }
449         }
450       }
451       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
452       *norm = 0.0;
453       for (col=0; col<mat->cmap->N; col++) {
454         if (rsum2[col] > *norm) *norm = rsum2[col];
455       }
456       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
457     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
458   }
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
464 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
465 {
466   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
467   PetscErrorCode ierr;
468   PetscInt       nstash,reallocs;
469   InsertMode     addv;
470 
471   PetscFunctionBegin;
472   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
473 
474   /* make sure all processors are either in INSERTMODE or ADDMODE */
475   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
476   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
477   mat->insertmode = addv; /* in case this processor had no cache */
478 
479   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
480   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
481   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
482   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
483   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
484   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNCT__
489 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
490 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
491 {
492   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
493   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
494   PetscErrorCode ierr;
495   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
496   PetscInt       *row,*col;
497   PetscBool      other_disassembled;
498   PetscMPIInt    n;
499   PetscBool      r1,r2,r3;
500   MatScalar      *val;
501   InsertMode     addv = mat->insertmode;
502 
503   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
504   PetscFunctionBegin;
505   if (!baij->donotstash &&  !mat->nooffprocentries) {
506     while (1) {
507       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
508       if (!flg) break;
509 
510       for (i=0; i<n;) {
511         /* Now identify the consecutive vals belonging to the same row */
512         for (j=i,rstart=row[j]; j<n; j++) {
513           if (row[j] != rstart) break;
514         }
515         if (j < n) ncols = j-i;
516         else       ncols = n-i;
517         /* Now assemble all these values with a single function call */
518         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
519         i    = j;
520       }
521     }
522     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
523     /* Now process the block-stash. Since the values are stashed column-oriented,
524        set the roworiented flag to column oriented, and after MatSetValues()
525        restore the original flags */
526     r1 = baij->roworiented;
527     r2 = a->roworiented;
528     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
529 
530     baij->roworiented = PETSC_FALSE;
531     a->roworiented    = PETSC_FALSE;
532 
533     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
534     while (1) {
535       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
536       if (!flg) break;
537 
538       for (i=0; i<n;) {
539         /* Now identify the consecutive vals belonging to the same row */
540         for (j=i,rstart=row[j]; j<n; j++) {
541           if (row[j] != rstart) break;
542         }
543         if (j < n) ncols = j-i;
544         else       ncols = n-i;
545         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
546         i    = j;
547       }
548     }
549     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
550 
551     baij->roworiented = r1;
552     a->roworiented    = r2;
553 
554     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
555   }
556 
557   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
558   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
559 
560   /* determine if any processor has disassembled, if so we must
561      also disassemble ourselfs, in order that we may reassemble. */
562   /*
563      if nonzero structure of submatrix B cannot change then we know that
564      no processor disassembled thus we can skip this stuff
565   */
566   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
567     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
568     if (mat->was_assembled && !other_disassembled) {
569       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
570     }
571   }
572 
573   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
574     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
575   }
576   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
577   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
578 
579   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
580 
581   baij->rowvalues = 0;
582 
583   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
584   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
585     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
586     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
587   }
588   PetscFunctionReturn(0);
589 }
590 
591 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
592 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
593 #include <petscdraw.h>
594 #undef __FUNCT__
595 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
596 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
597 {
598   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
599   PetscErrorCode    ierr;
600   PetscInt          bs   = mat->rmap->bs;
601   PetscMPIInt       rank = baij->rank;
602   PetscBool         iascii,isdraw;
603   PetscViewer       sviewer;
604   PetscViewerFormat format;
605 
606   PetscFunctionBegin;
607   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
608   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
609   if (iascii) {
610     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
611     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
612       MatInfo info;
613       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
614       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
615       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
616       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
617       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
618       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
619       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
620       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
621       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
622       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
623       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
624       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
625       PetscFunctionReturn(0);
626     } else if (format == PETSC_VIEWER_ASCII_INFO) {
627       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
628       PetscFunctionReturn(0);
629     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
630       PetscFunctionReturn(0);
631     }
632   }
633 
634   if (isdraw) {
635     PetscDraw draw;
636     PetscBool isnull;
637     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
638     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
639   }
640 
641   {
642     /* assemble the entire matrix onto first processor. */
643     Mat          A;
644     Mat_SeqSBAIJ *Aloc;
645     Mat_SeqBAIJ  *Bloc;
646     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
647     MatScalar    *a;
648     const char   *matname;
649 
650     /* Should this be the same type as mat? */
651     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
652     if (!rank) {
653       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
654     } else {
655       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
656     }
657     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
658     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
659     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
660     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
661 
662     /* copy over the A part */
663     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
664     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
665     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
666 
667     for (i=0; i<mbs; i++) {
668       rvals[0] = bs*(baij->rstartbs + i);
669       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
670       for (j=ai[i]; j<ai[i+1]; j++) {
671         col = (baij->cstartbs+aj[j])*bs;
672         for (k=0; k<bs; k++) {
673           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
674           col++;
675           a += bs;
676         }
677       }
678     }
679     /* copy over the B part */
680     Bloc = (Mat_SeqBAIJ*)baij->B->data;
681     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
682     for (i=0; i<mbs; i++) {
683 
684       rvals[0] = bs*(baij->rstartbs + i);
685       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
686       for (j=ai[i]; j<ai[i+1]; j++) {
687         col = baij->garray[aj[j]]*bs;
688         for (k=0; k<bs; k++) {
689           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
690           col++;
691           a += bs;
692         }
693       }
694     }
695     ierr = PetscFree(rvals);CHKERRQ(ierr);
696     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
697     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
698     /*
699        Everyone has to call to draw the matrix since the graphics waits are
700        synchronized across all processors that share the PetscDraw object
701     */
702     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
703     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
704     if (!rank) {
705       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
706       ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
707     }
708     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
709     ierr = MatDestroy(&A);CHKERRQ(ierr);
710   }
711   PetscFunctionReturn(0);
712 }
713 
714 #undef __FUNCT__
715 #define __FUNCT__ "MatView_MPISBAIJ"
716 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
717 {
718   PetscErrorCode ierr;
719   PetscBool      iascii,isdraw,issocket,isbinary;
720 
721   PetscFunctionBegin;
722   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
723   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
724   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
725   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
726   if (iascii || isdraw || issocket || isbinary) {
727     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
728   }
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatDestroy_MPISBAIJ"
734 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
735 {
736   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
737   PetscErrorCode ierr;
738 
739   PetscFunctionBegin;
740 #if defined(PETSC_USE_LOG)
741   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
742 #endif
743   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
744   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
745   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
746   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
747 #if defined(PETSC_USE_CTABLE)
748   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
749 #else
750   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
751 #endif
752   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
753   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
754   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
755   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
756   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
757   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
758   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
759   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
760   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
761   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
762   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
763   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
764   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
765   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
767 #if defined(PETSC_USE_REAL_MAT_SINGLE)
768   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
769 #endif
770   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
771   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
772   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
773   ierr = PetscFree(mat->data);CHKERRQ(ierr);
774 
775   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
776   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
777   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
778   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
779   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
780   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
781 #if defined(PETSC_HAVE_ELEMENTAL)
782   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
783 #endif
784   PetscFunctionReturn(0);
785 }
786 
787 #undef __FUNCT__
788 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
789 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
790 {
791   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
792   PetscErrorCode ierr;
793   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
794   PetscScalar    *x,*from;
795 
796   PetscFunctionBegin;
797   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
798   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
799 
800   /* diagonal part */
801   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
802   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
803 
804   /* subdiagonal part */
805   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
806 
807   /* copy x into the vec slvec0 */
808   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
809   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
810 
811   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
812   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
813   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
814 
815   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
816   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
817   /* supperdiagonal part */
818   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
819   PetscFunctionReturn(0);
820 }
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMult_MPISBAIJ"
824 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
825 {
826   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
827   PetscErrorCode    ierr;
828   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
829   PetscScalar       *from;
830   const PetscScalar *x;
831 
832   PetscFunctionBegin;
833   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
834   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
835 
836   /* diagonal part */
837   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
838   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
839 
840   /* subdiagonal part */
841   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
842 
843   /* copy x into the vec slvec0 */
844   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
845   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
846 
847   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
848   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
849   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
850 
851   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
852   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
853   /* supperdiagonal part */
854   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
855   PetscFunctionReturn(0);
856 }
857 
858 #undef __FUNCT__
859 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
860 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
861 {
862   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
863   PetscErrorCode ierr;
864   PetscInt       nt;
865 
866   PetscFunctionBegin;
867   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
868   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
869 
870   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
871   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
872 
873   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874   /* do diagonal part */
875   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
876   /* do supperdiagonal part */
877   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
879   /* do subdiagonal part */
880   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
881   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
882   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
888 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
889 {
890   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
891   PetscErrorCode    ierr;
892   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
893   PetscScalar       *from,zero=0.0;
894   const PetscScalar *x;
895 
896   PetscFunctionBegin;
897   /*
898   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
899   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
900   */
901   /* diagonal part */
902   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
903   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
904 
905   /* subdiagonal part */
906   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
907 
908   /* copy x into the vec slvec0 */
909   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
910   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
911   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
912   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
913 
914   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
916   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917 
918   /* supperdiagonal part */
919   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
920   PetscFunctionReturn(0);
921 }
922 
923 #undef __FUNCT__
924 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
925 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
926 {
927   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
928   PetscErrorCode ierr;
929 
930   PetscFunctionBegin;
931   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
932   /* do diagonal part */
933   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
934   /* do supperdiagonal part */
935   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
936   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
937 
938   /* do subdiagonal part */
939   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
940   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
941   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
942   PetscFunctionReturn(0);
943 }
944 
945 /*
946   This only works correctly for square matrices where the subblock A->A is the
947    diagonal block
948 */
949 #undef __FUNCT__
950 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
951 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
952 {
953   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
954   PetscErrorCode ierr;
955 
956   PetscFunctionBegin;
957   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
958   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatScale_MPISBAIJ"
964 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
965 {
966   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
967   PetscErrorCode ierr;
968 
969   PetscFunctionBegin;
970   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
971   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatGetRow_MPISBAIJ"
977 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
978 {
979   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
980   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
981   PetscErrorCode ierr;
982   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
983   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
984   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
985 
986   PetscFunctionBegin;
987   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
988   mat->getrowactive = PETSC_TRUE;
989 
990   if (!mat->rowvalues && (idx || v)) {
991     /*
992         allocate enough space to hold information from the longest row.
993     */
994     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
995     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
996     PetscInt     max = 1,mbs = mat->mbs,tmp;
997     for (i=0; i<mbs; i++) {
998       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
999       if (max < tmp) max = tmp;
1000     }
1001     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1002   }
1003 
1004   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1005   lrow = row - brstart;  /* local row index */
1006 
1007   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1008   if (!v)   {pvA = 0; pvB = 0;}
1009   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1010   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1011   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1012   nztot = nzA + nzB;
1013 
1014   cmap = mat->garray;
1015   if (v  || idx) {
1016     if (nztot) {
1017       /* Sort by increasing column numbers, assuming A and B already sorted */
1018       PetscInt imark = -1;
1019       if (v) {
1020         *v = v_p = mat->rowvalues;
1021         for (i=0; i<nzB; i++) {
1022           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1023           else break;
1024         }
1025         imark = i;
1026         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1027         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1028       }
1029       if (idx) {
1030         *idx = idx_p = mat->rowindices;
1031         if (imark > -1) {
1032           for (i=0; i<imark; i++) {
1033             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1034           }
1035         } else {
1036           for (i=0; i<nzB; i++) {
1037             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1038             else break;
1039           }
1040           imark = i;
1041         }
1042         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1043         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1044       }
1045     } else {
1046       if (idx) *idx = 0;
1047       if (v)   *v   = 0;
1048     }
1049   }
1050   *nz  = nztot;
1051   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1052   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 #undef __FUNCT__
1057 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1058 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1059 {
1060   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1061 
1062   PetscFunctionBegin;
1063   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1064   baij->getrowactive = PETSC_FALSE;
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1070 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1071 {
1072   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1073   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1074 
1075   PetscFunctionBegin;
1076   aA->getrow_utriangular = PETSC_TRUE;
1077   PetscFunctionReturn(0);
1078 }
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1081 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1082 {
1083   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1084   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1085 
1086   PetscFunctionBegin;
1087   aA->getrow_utriangular = PETSC_FALSE;
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1093 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1094 {
1095   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1100   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1106 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1107 {
1108   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1109   PetscErrorCode ierr;
1110 
1111   PetscFunctionBegin;
1112   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1113   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ()
1118    Input: isrow       - distributed(parallel),
1119           iscol_local - locally owned (seq)
1120 */
1121 #undef __FUNCT__
1122 #define __FUNCT__ "ISEqual_private"
1123 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1124 {
1125   PetscErrorCode ierr;
1126   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1127   const PetscInt *ptr1,*ptr2;
1128 
1129   PetscFunctionBegin;
1130   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1131   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1132   if (sz1 > sz2) {
1133     *flg = PETSC_FALSE;
1134     PetscFunctionReturn(0);
1135   }
1136 
1137   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1138   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1139 
1140   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1141   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1142   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1143   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1144   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1145   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1146 
1147   nmatch=0;
1148   k     = 0;
1149   for (i=0; i<sz1; i++){
1150     for (j=k; j<sz2; j++){
1151       if (a1[i] == a2[j]) {
1152         k = j; nmatch++;
1153         break;
1154       }
1155     }
1156   }
1157   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1158   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1159   ierr = PetscFree(a1);CHKERRQ(ierr);
1160   ierr = PetscFree(a2);CHKERRQ(ierr);
1161   if (nmatch < sz1) {
1162     *flg = PETSC_FALSE;
1163   } else {
1164     *flg = PETSC_TRUE;
1165   }
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 #undef __FUNCT__
1170 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ"
1171 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1172 {
1173   PetscErrorCode ierr;
1174   IS             iscol_local;
1175   PetscInt       csize;
1176   PetscBool      isequal;
1177 
1178   PetscFunctionBegin;
1179   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1180   if (call == MAT_REUSE_MATRIX) {
1181     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1182     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1183   } else {
1184     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1185     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1186     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1187   }
1188 
1189   /* now call MatGetSubMatrix_MPIBAIJ() */
1190   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1191   if (call == MAT_INITIAL_MATRIX) {
1192     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1193     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1194   }
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1200 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1201 {
1202   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1207   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1213 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1214 {
1215   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1216   Mat            A  = a->A,B = a->B;
1217   PetscErrorCode ierr;
1218   PetscReal      isend[5],irecv[5];
1219 
1220   PetscFunctionBegin;
1221   info->block_size = (PetscReal)matin->rmap->bs;
1222 
1223   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1224 
1225   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1226   isend[3] = info->memory;  isend[4] = info->mallocs;
1227 
1228   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1229 
1230   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1231   isend[3] += info->memory;  isend[4] += info->mallocs;
1232   if (flag == MAT_LOCAL) {
1233     info->nz_used      = isend[0];
1234     info->nz_allocated = isend[1];
1235     info->nz_unneeded  = isend[2];
1236     info->memory       = isend[3];
1237     info->mallocs      = isend[4];
1238   } else if (flag == MAT_GLOBAL_MAX) {
1239     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1240 
1241     info->nz_used      = irecv[0];
1242     info->nz_allocated = irecv[1];
1243     info->nz_unneeded  = irecv[2];
1244     info->memory       = irecv[3];
1245     info->mallocs      = irecv[4];
1246   } else if (flag == MAT_GLOBAL_SUM) {
1247     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1248 
1249     info->nz_used      = irecv[0];
1250     info->nz_allocated = irecv[1];
1251     info->nz_unneeded  = irecv[2];
1252     info->memory       = irecv[3];
1253     info->mallocs      = irecv[4];
1254   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1255   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1256   info->fill_ratio_needed = 0;
1257   info->factor_mallocs    = 0;
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1263 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1264 {
1265   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1266   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1267   PetscErrorCode ierr;
1268 
1269   PetscFunctionBegin;
1270   switch (op) {
1271   case MAT_NEW_NONZERO_LOCATIONS:
1272   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1273   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1274   case MAT_KEEP_NONZERO_PATTERN:
1275   case MAT_NEW_NONZERO_LOCATION_ERR:
1276     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1277     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1278     break;
1279   case MAT_ROW_ORIENTED:
1280     a->roworiented = flg;
1281 
1282     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1283     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1284     break;
1285   case MAT_NEW_DIAGONALS:
1286     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1287     break;
1288   case MAT_IGNORE_OFF_PROC_ENTRIES:
1289     a->donotstash = flg;
1290     break;
1291   case MAT_USE_HASH_TABLE:
1292     a->ht_flag = flg;
1293     break;
1294   case MAT_HERMITIAN:
1295     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1296     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1297 
1298     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1299     break;
1300   case MAT_SPD:
1301     A->spd_set = PETSC_TRUE;
1302     A->spd     = flg;
1303     if (flg) {
1304       A->symmetric                  = PETSC_TRUE;
1305       A->structurally_symmetric     = PETSC_TRUE;
1306       A->symmetric_set              = PETSC_TRUE;
1307       A->structurally_symmetric_set = PETSC_TRUE;
1308     }
1309     break;
1310   case MAT_SYMMETRIC:
1311     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1312     break;
1313   case MAT_STRUCTURALLY_SYMMETRIC:
1314     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1315     break;
1316   case MAT_SYMMETRY_ETERNAL:
1317     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1318     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1319     break;
1320   case MAT_IGNORE_LOWER_TRIANGULAR:
1321     aA->ignore_ltriangular = flg;
1322     break;
1323   case MAT_ERROR_LOWER_TRIANGULAR:
1324     aA->ignore_ltriangular = flg;
1325     break;
1326   case MAT_GETROW_UPPERTRIANGULAR:
1327     aA->getrow_utriangular = flg;
1328     break;
1329   default:
1330     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1331   }
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1337 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1338 {
1339   PetscErrorCode ierr;
1340 
1341   PetscFunctionBegin;
1342   if (MAT_INITIAL_MATRIX || *B != A) {
1343     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1344   }
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1350 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1351 {
1352   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1353   Mat            a     = baij->A, b=baij->B;
1354   PetscErrorCode ierr;
1355   PetscInt       nv,m,n;
1356   PetscBool      flg;
1357 
1358   PetscFunctionBegin;
1359   if (ll != rr) {
1360     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1361     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1362   }
1363   if (!ll) PetscFunctionReturn(0);
1364 
1365   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1366   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1367 
1368   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1369   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1370 
1371   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1372 
1373   /* left diagonalscale the off-diagonal part */
1374   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1375 
1376   /* scale the diagonal part */
1377   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1378 
1379   /* right diagonalscale the off-diagonal part */
1380   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1381   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 #undef __FUNCT__
1386 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1387 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1388 {
1389   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1390   PetscErrorCode ierr;
1391 
1392   PetscFunctionBegin;
1393   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1398 
1399 #undef __FUNCT__
1400 #define __FUNCT__ "MatEqual_MPISBAIJ"
1401 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1402 {
1403   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1404   Mat            a,b,c,d;
1405   PetscBool      flg;
1406   PetscErrorCode ierr;
1407 
1408   PetscFunctionBegin;
1409   a = matA->A; b = matA->B;
1410   c = matB->A; d = matB->B;
1411 
1412   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1413   if (flg) {
1414     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1415   }
1416   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatCopy_MPISBAIJ"
1422 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1423 {
1424   PetscErrorCode ierr;
1425   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1426   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1427 
1428   PetscFunctionBegin;
1429   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1430   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1431     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1432     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1433     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1434   } else {
1435     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1436     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1437   }
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1443 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1444 {
1445   PetscErrorCode ierr;
1446 
1447   PetscFunctionBegin;
1448   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1454 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1455 {
1456   PetscErrorCode ierr;
1457   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1458   PetscBLASInt   bnz,one=1;
1459   Mat_SeqSBAIJ   *xa,*ya;
1460   Mat_SeqBAIJ    *xb,*yb;
1461 
1462   PetscFunctionBegin;
1463   if (str == SAME_NONZERO_PATTERN) {
1464     PetscScalar alpha = a;
1465     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1466     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1467     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1468     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1469     xb   = (Mat_SeqBAIJ*)xx->B->data;
1470     yb   = (Mat_SeqBAIJ*)yy->B->data;
1471     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1472     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1473     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1474   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1475     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1476     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1477     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1478   } else {
1479     Mat      B;
1480     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1481     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1482     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1483     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1484     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1485     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1486     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1487     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1488     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1489     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1490     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1491     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1492     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1493     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1494     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1495     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1496     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1497     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1498     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1499     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1506 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1507 {
1508   PetscErrorCode ierr;
1509   PetscInt       i;
1510   PetscBool      flg,sorted;
1511 
1512   PetscFunctionBegin;
1513   for (i = 0; i < n; i++) {
1514     ierr = ISSorted(irow[i],&sorted);CHKERRQ(ierr);
1515     if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row index set %d not sorted",i);
1516     ierr = ISSorted(icol[i],&sorted);CHKERRQ(ierr);
1517     if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Column index set %d not sorted",i);
1518   }
1519   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1520   for (i=0; i<n; i++) {
1521     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1522     if (!flg) { /* *B[i] is non-symmetric, set flag */
1523       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1524     }
1525   }
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 /* -------------------------------------------------------------------*/
1530 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1531                                        MatGetRow_MPISBAIJ,
1532                                        MatRestoreRow_MPISBAIJ,
1533                                        MatMult_MPISBAIJ,
1534                                /*  4*/ MatMultAdd_MPISBAIJ,
1535                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1536                                        MatMultAdd_MPISBAIJ,
1537                                        0,
1538                                        0,
1539                                        0,
1540                                /* 10*/ 0,
1541                                        0,
1542                                        0,
1543                                        MatSOR_MPISBAIJ,
1544                                        MatTranspose_MPISBAIJ,
1545                                /* 15*/ MatGetInfo_MPISBAIJ,
1546                                        MatEqual_MPISBAIJ,
1547                                        MatGetDiagonal_MPISBAIJ,
1548                                        MatDiagonalScale_MPISBAIJ,
1549                                        MatNorm_MPISBAIJ,
1550                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1551                                        MatAssemblyEnd_MPISBAIJ,
1552                                        MatSetOption_MPISBAIJ,
1553                                        MatZeroEntries_MPISBAIJ,
1554                                /* 24*/ 0,
1555                                        0,
1556                                        0,
1557                                        0,
1558                                        0,
1559                                /* 29*/ MatSetUp_MPISBAIJ,
1560                                        0,
1561                                        0,
1562                                        0,
1563                                        0,
1564                                /* 34*/ MatDuplicate_MPISBAIJ,
1565                                        0,
1566                                        0,
1567                                        0,
1568                                        0,
1569                                /* 39*/ MatAXPY_MPISBAIJ,
1570                                        MatGetSubMatrices_MPISBAIJ,
1571                                        MatIncreaseOverlap_MPISBAIJ,
1572                                        MatGetValues_MPISBAIJ,
1573                                        MatCopy_MPISBAIJ,
1574                                /* 44*/ 0,
1575                                        MatScale_MPISBAIJ,
1576                                        0,
1577                                        0,
1578                                        0,
1579                                /* 49*/ 0,
1580                                        0,
1581                                        0,
1582                                        0,
1583                                        0,
1584                                /* 54*/ 0,
1585                                        0,
1586                                        MatSetUnfactored_MPISBAIJ,
1587                                        0,
1588                                        MatSetValuesBlocked_MPISBAIJ,
1589                                /* 59*/ MatGetSubMatrix_MPISBAIJ,
1590                                        0,
1591                                        0,
1592                                        0,
1593                                        0,
1594                                /* 64*/ 0,
1595                                        0,
1596                                        0,
1597                                        0,
1598                                        0,
1599                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1600                                        0,
1601                                        0,
1602                                        0,
1603                                        0,
1604                                /* 74*/ 0,
1605                                        0,
1606                                        0,
1607                                        0,
1608                                        0,
1609                                /* 79*/ 0,
1610                                        0,
1611                                        0,
1612                                        0,
1613                                        MatLoad_MPISBAIJ,
1614                                /* 84*/ 0,
1615                                        0,
1616                                        0,
1617                                        0,
1618                                        0,
1619                                /* 89*/ 0,
1620                                        0,
1621                                        0,
1622                                        0,
1623                                        0,
1624                                /* 94*/ 0,
1625                                        0,
1626                                        0,
1627                                        0,
1628                                        0,
1629                                /* 99*/ 0,
1630                                        0,
1631                                        0,
1632                                        0,
1633                                        0,
1634                                /*104*/ 0,
1635                                        MatRealPart_MPISBAIJ,
1636                                        MatImaginaryPart_MPISBAIJ,
1637                                        MatGetRowUpperTriangular_MPISBAIJ,
1638                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1639                                /*109*/ 0,
1640                                        0,
1641                                        0,
1642                                        0,
1643                                        0,
1644                                /*114*/ 0,
1645                                        0,
1646                                        0,
1647                                        0,
1648                                        0,
1649                                /*119*/ 0,
1650                                        0,
1651                                        0,
1652                                        0,
1653                                        0,
1654                                /*124*/ 0,
1655                                        0,
1656                                        0,
1657                                        0,
1658                                        0,
1659                                /*129*/ 0,
1660                                        0,
1661                                        0,
1662                                        0,
1663                                        0,
1664                                /*134*/ 0,
1665                                        0,
1666                                        0,
1667                                        0,
1668                                        0,
1669                                /*139*/ 0,
1670                                        0,
1671                                        0,
1672                                        0,
1673                                        0,
1674                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1675 };
1676 
1677 #undef __FUNCT__
1678 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1679 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1680 {
1681   PetscFunctionBegin;
1682   *a = ((Mat_MPISBAIJ*)A->data)->A;
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1688 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1689 {
1690   Mat_MPISBAIJ   *b;
1691   PetscErrorCode ierr;
1692   PetscInt       i,mbs,Mbs;
1693 
1694   PetscFunctionBegin;
1695   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1696   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1697   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1698   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1699 
1700   b   = (Mat_MPISBAIJ*)B->data;
1701   mbs = B->rmap->n/bs;
1702   Mbs = B->rmap->N/bs;
1703   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1704 
1705   B->rmap->bs = bs;
1706   b->bs2      = bs*bs;
1707   b->mbs      = mbs;
1708   b->Mbs      = Mbs;
1709   b->nbs      = B->cmap->n/bs;
1710   b->Nbs      = B->cmap->N/bs;
1711 
1712   for (i=0; i<=b->size; i++) {
1713     b->rangebs[i] = B->rmap->range[i]/bs;
1714   }
1715   b->rstartbs = B->rmap->rstart/bs;
1716   b->rendbs   = B->rmap->rend/bs;
1717 
1718   b->cstartbs = B->cmap->rstart/bs;
1719   b->cendbs   = B->cmap->rend/bs;
1720 
1721   if (!B->preallocated) {
1722     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1723     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1724     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1725     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1726     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1727     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1728     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1729     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1730     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1731   }
1732 
1733   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1734   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1735 
1736   B->preallocated = PETSC_TRUE;
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 #undef __FUNCT__
1741 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1742 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1743 {
1744   PetscInt       m,rstart,cstart,cend;
1745   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1746   const PetscInt *JJ    =0;
1747   PetscScalar    *values=0;
1748   PetscErrorCode ierr;
1749 
1750   PetscFunctionBegin;
1751   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1752   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1753   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1754   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1755   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1756   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1757   m      = B->rmap->n/bs;
1758   rstart = B->rmap->rstart/bs;
1759   cstart = B->cmap->rstart/bs;
1760   cend   = B->cmap->rend/bs;
1761 
1762   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1763   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1764   for (i=0; i<m; i++) {
1765     nz = ii[i+1] - ii[i];
1766     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1767     nz_max = PetscMax(nz_max,nz);
1768     JJ     = jj + ii[i];
1769     for (j=0; j<nz; j++) {
1770       if (*JJ >= cstart) break;
1771       JJ++;
1772     }
1773     d = 0;
1774     for (; j<nz; j++) {
1775       if (*JJ++ >= cend) break;
1776       d++;
1777     }
1778     d_nnz[i] = d;
1779     o_nnz[i] = nz - d;
1780   }
1781   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1782   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1783 
1784   values = (PetscScalar*)V;
1785   if (!values) {
1786     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1787     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1788   }
1789   for (i=0; i<m; i++) {
1790     PetscInt          row    = i + rstart;
1791     PetscInt          ncols  = ii[i+1] - ii[i];
1792     const PetscInt    *icols = jj + ii[i];
1793     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1794     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1795   }
1796 
1797   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1798   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1799   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1800   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 /*MC
1805    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1806    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1807    the matrix is stored.
1808 
1809   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1810   can call MatSetOption(Mat, MAT_HERMITIAN);
1811 
1812    Options Database Keys:
1813 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1814 
1815   Level: beginner
1816 
1817 .seealso: MatCreateMPISBAIJ
1818 M*/
1819 
1820 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1821 
1822 #undef __FUNCT__
1823 #define __FUNCT__ "MatCreate_MPISBAIJ"
1824 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1825 {
1826   Mat_MPISBAIJ   *b;
1827   PetscErrorCode ierr;
1828   PetscBool      flg = PETSC_FALSE;
1829 
1830   PetscFunctionBegin;
1831   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1832   B->data = (void*)b;
1833   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1834 
1835   B->ops->destroy = MatDestroy_MPISBAIJ;
1836   B->ops->view    = MatView_MPISBAIJ;
1837   B->assembled    = PETSC_FALSE;
1838   B->insertmode   = NOT_SET_VALUES;
1839 
1840   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1841   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1842 
1843   /* build local table of row and column ownerships */
1844   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
1845 
1846   /* build cache for off array entries formed */
1847   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1848 
1849   b->donotstash  = PETSC_FALSE;
1850   b->colmap      = NULL;
1851   b->garray      = NULL;
1852   b->roworiented = PETSC_TRUE;
1853 
1854   /* stuff used in block assembly */
1855   b->barray = 0;
1856 
1857   /* stuff used for matrix vector multiply */
1858   b->lvec    = 0;
1859   b->Mvctx   = 0;
1860   b->slvec0  = 0;
1861   b->slvec0b = 0;
1862   b->slvec1  = 0;
1863   b->slvec1a = 0;
1864   b->slvec1b = 0;
1865   b->sMvctx  = 0;
1866 
1867   /* stuff for MatGetRow() */
1868   b->rowindices   = 0;
1869   b->rowvalues    = 0;
1870   b->getrowactive = PETSC_FALSE;
1871 
1872   /* hash table stuff */
1873   b->ht           = 0;
1874   b->hd           = 0;
1875   b->ht_size      = 0;
1876   b->ht_flag      = PETSC_FALSE;
1877   b->ht_fact      = 0;
1878   b->ht_total_ct  = 0;
1879   b->ht_insert_ct = 0;
1880 
1881   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1882   b->ijonly = PETSC_FALSE;
1883 
1884   b->in_loc = 0;
1885   b->v_loc  = 0;
1886   b->n_loc  = 0;
1887 
1888   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1889   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1890   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1891   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1892   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1893   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1894 #if defined(PETSC_HAVE_ELEMENTAL)
1895   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
1896 #endif
1897 
1898   B->symmetric                  = PETSC_TRUE;
1899   B->structurally_symmetric     = PETSC_TRUE;
1900   B->symmetric_set              = PETSC_TRUE;
1901   B->structurally_symmetric_set = PETSC_TRUE;
1902 
1903   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1904   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1905   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
1906   if (flg) {
1907     PetscReal fact = 1.39;
1908     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1909     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1910     if (fact <= 1.0) fact = 1.39;
1911     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1912     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1913   }
1914   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 /*MC
1919    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1920 
1921    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1922    and MATMPISBAIJ otherwise.
1923 
1924    Options Database Keys:
1925 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1926 
1927   Level: beginner
1928 
1929 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1930 M*/
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1934 /*@C
1935    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1936    the user should preallocate the matrix storage by setting the parameters
1937    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1938    performance can be increased by more than a factor of 50.
1939 
1940    Collective on Mat
1941 
1942    Input Parameters:
1943 +  B - the matrix
1944 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1945           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1946 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1947            submatrix  (same for all local rows)
1948 .  d_nnz - array containing the number of block nonzeros in the various block rows
1949            in the upper triangular and diagonal part of the in diagonal portion of the local
1950            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1951            for the diagonal entry and set a value even if it is zero.
1952 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1953            submatrix (same for all local rows).
1954 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1955            off-diagonal portion of the local submatrix that is right of the diagonal
1956            (possibly different for each block row) or NULL.
1957 
1958 
1959    Options Database Keys:
1960 .   -mat_no_unroll - uses code that does not unroll the loops in the
1961                      block calculations (much slower)
1962 .   -mat_block_size - size of the blocks to use
1963 
1964    Notes:
1965 
1966    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1967    than it must be used on all processors that share the object for that argument.
1968 
1969    If the *_nnz parameter is given then the *_nz parameter is ignored
1970 
1971    Storage Information:
1972    For a square global matrix we define each processor's diagonal portion
1973    to be its local rows and the corresponding columns (a square submatrix);
1974    each processor's off-diagonal portion encompasses the remainder of the
1975    local matrix (a rectangular submatrix).
1976 
1977    The user can specify preallocated storage for the diagonal part of
1978    the local submatrix with either d_nz or d_nnz (not both).  Set
1979    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1980    memory allocation.  Likewise, specify preallocated storage for the
1981    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1982 
1983    You can call MatGetInfo() to get information on how effective the preallocation was;
1984    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1985    You can also run with the option -info and look for messages with the string
1986    malloc in them to see if additional memory allocation was needed.
1987 
1988    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1989    the figure below we depict these three local rows and all columns (0-11).
1990 
1991 .vb
1992            0 1 2 3 4 5 6 7 8 9 10 11
1993           --------------------------
1994    row 3  |. . . d d d o o o o  o  o
1995    row 4  |. . . d d d o o o o  o  o
1996    row 5  |. . . d d d o o o o  o  o
1997           --------------------------
1998 .ve
1999 
2000    Thus, any entries in the d locations are stored in the d (diagonal)
2001    submatrix, and any entries in the o locations are stored in the
2002    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2003    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2004 
2005    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2006    plus the diagonal part of the d matrix,
2007    and o_nz should indicate the number of block nonzeros per row in the o matrix
2008 
2009    In general, for PDE problems in which most nonzeros are near the diagonal,
2010    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2011    or you will get TERRIBLE performance; see the users' manual chapter on
2012    matrices.
2013 
2014    Level: intermediate
2015 
2016 .keywords: matrix, block, aij, compressed row, sparse, parallel
2017 
2018 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2019 @*/
2020 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2021 {
2022   PetscErrorCode ierr;
2023 
2024   PetscFunctionBegin;
2025   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2026   PetscValidType(B,1);
2027   PetscValidLogicalCollectiveInt(B,bs,2);
2028   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2029   PetscFunctionReturn(0);
2030 }
2031 
2032 #undef __FUNCT__
2033 #define __FUNCT__ "MatCreateSBAIJ"
2034 /*@C
2035    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2036    (block compressed row).  For good matrix assembly performance
2037    the user should preallocate the matrix storage by setting the parameters
2038    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2039    performance can be increased by more than a factor of 50.
2040 
2041    Collective on MPI_Comm
2042 
2043    Input Parameters:
2044 +  comm - MPI communicator
2045 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2046           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2047 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2048            This value should be the same as the local size used in creating the
2049            y vector for the matrix-vector product y = Ax.
2050 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2051            This value should be the same as the local size used in creating the
2052            x vector for the matrix-vector product y = Ax.
2053 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2054 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2055 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2056            submatrix  (same for all local rows)
2057 .  d_nnz - array containing the number of block nonzeros in the various block rows
2058            in the upper triangular portion of the in diagonal portion of the local
2059            (possibly different for each block block row) or NULL.
2060            If you plan to factor the matrix you must leave room for the diagonal entry and
2061            set its value even if it is zero.
2062 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2063            submatrix (same for all local rows).
2064 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2065            off-diagonal portion of the local submatrix (possibly different for
2066            each block row) or NULL.
2067 
2068    Output Parameter:
2069 .  A - the matrix
2070 
2071    Options Database Keys:
2072 .   -mat_no_unroll - uses code that does not unroll the loops in the
2073                      block calculations (much slower)
2074 .   -mat_block_size - size of the blocks to use
2075 .   -mat_mpi - use the parallel matrix data structures even on one processor
2076                (defaults to using SeqBAIJ format on one processor)
2077 
2078    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2079    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2080    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2081 
2082    Notes:
2083    The number of rows and columns must be divisible by blocksize.
2084    This matrix type does not support complex Hermitian operation.
2085 
2086    The user MUST specify either the local or global matrix dimensions
2087    (possibly both).
2088 
2089    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2090    than it must be used on all processors that share the object for that argument.
2091 
2092    If the *_nnz parameter is given then the *_nz parameter is ignored
2093 
2094    Storage Information:
2095    For a square global matrix we define each processor's diagonal portion
2096    to be its local rows and the corresponding columns (a square submatrix);
2097    each processor's off-diagonal portion encompasses the remainder of the
2098    local matrix (a rectangular submatrix).
2099 
2100    The user can specify preallocated storage for the diagonal part of
2101    the local submatrix with either d_nz or d_nnz (not both).  Set
2102    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2103    memory allocation.  Likewise, specify preallocated storage for the
2104    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2105 
2106    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2107    the figure below we depict these three local rows and all columns (0-11).
2108 
2109 .vb
2110            0 1 2 3 4 5 6 7 8 9 10 11
2111           --------------------------
2112    row 3  |. . . d d d o o o o  o  o
2113    row 4  |. . . d d d o o o o  o  o
2114    row 5  |. . . d d d o o o o  o  o
2115           --------------------------
2116 .ve
2117 
2118    Thus, any entries in the d locations are stored in the d (diagonal)
2119    submatrix, and any entries in the o locations are stored in the
2120    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2121    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2122 
2123    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2124    plus the diagonal part of the d matrix,
2125    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2126    In general, for PDE problems in which most nonzeros are near the diagonal,
2127    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2128    or you will get TERRIBLE performance; see the users' manual chapter on
2129    matrices.
2130 
2131    Level: intermediate
2132 
2133 .keywords: matrix, block, aij, compressed row, sparse, parallel
2134 
2135 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2136 @*/
2137 
2138 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2139 {
2140   PetscErrorCode ierr;
2141   PetscMPIInt    size;
2142 
2143   PetscFunctionBegin;
2144   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2145   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2146   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2147   if (size > 1) {
2148     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2149     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2150   } else {
2151     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2152     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2153   }
2154   PetscFunctionReturn(0);
2155 }
2156 
2157 
2158 #undef __FUNCT__
2159 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2160 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2161 {
2162   Mat            mat;
2163   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2164   PetscErrorCode ierr;
2165   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2166   PetscScalar    *array;
2167 
2168   PetscFunctionBegin;
2169   *newmat = 0;
2170 
2171   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2172   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2173   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2174   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2175   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2176   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2177 
2178   mat->factortype   = matin->factortype;
2179   mat->preallocated = PETSC_TRUE;
2180   mat->assembled    = PETSC_TRUE;
2181   mat->insertmode   = NOT_SET_VALUES;
2182 
2183   a      = (Mat_MPISBAIJ*)mat->data;
2184   a->bs2 = oldmat->bs2;
2185   a->mbs = oldmat->mbs;
2186   a->nbs = oldmat->nbs;
2187   a->Mbs = oldmat->Mbs;
2188   a->Nbs = oldmat->Nbs;
2189 
2190 
2191   a->size         = oldmat->size;
2192   a->rank         = oldmat->rank;
2193   a->donotstash   = oldmat->donotstash;
2194   a->roworiented  = oldmat->roworiented;
2195   a->rowindices   = 0;
2196   a->rowvalues    = 0;
2197   a->getrowactive = PETSC_FALSE;
2198   a->barray       = 0;
2199   a->rstartbs     = oldmat->rstartbs;
2200   a->rendbs       = oldmat->rendbs;
2201   a->cstartbs     = oldmat->cstartbs;
2202   a->cendbs       = oldmat->cendbs;
2203 
2204   /* hash table stuff */
2205   a->ht           = 0;
2206   a->hd           = 0;
2207   a->ht_size      = 0;
2208   a->ht_flag      = oldmat->ht_flag;
2209   a->ht_fact      = oldmat->ht_fact;
2210   a->ht_total_ct  = 0;
2211   a->ht_insert_ct = 0;
2212 
2213   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2214   if (oldmat->colmap) {
2215 #if defined(PETSC_USE_CTABLE)
2216     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2217 #else
2218     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2219     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2220     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2221 #endif
2222   } else a->colmap = 0;
2223 
2224   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2225     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2226     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2227     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2228   } else a->garray = 0;
2229 
2230   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2231   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2232   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2233   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2234   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2235 
2236   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2237   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2238   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2239   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2240 
2241   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2242   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2243   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2244   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2245   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2246   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2247   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2248   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2249   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2250   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2251   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2252   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2253   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2254 
2255   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2256   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2257   a->sMvctx = oldmat->sMvctx;
2258   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2259 
2260   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2261   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2262   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2263   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2264   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2265   *newmat = mat;
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 #undef __FUNCT__
2270 #define __FUNCT__ "MatLoad_MPISBAIJ"
2271 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2272 {
2273   PetscErrorCode ierr;
2274   PetscInt       i,nz,j,rstart,rend;
2275   PetscScalar    *vals,*buf;
2276   MPI_Comm       comm;
2277   MPI_Status     status;
2278   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2279   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2280   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2281   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2282   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2283   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2284   int            fd;
2285 
2286   PetscFunctionBegin;
2287   /* force binary viewer to load .info file if it has not yet done so */
2288   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2289   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2290   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2291   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2292   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2293   if (bs < 0) bs = 1;
2294 
2295   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2296   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2297   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2298   if (!rank) {
2299     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2300     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2301     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2302   }
2303 
2304   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2305 
2306   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2307   M    = header[1];
2308   N    = header[2];
2309 
2310   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2311   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2312   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2313 
2314   /* If global sizes are set, check if they are consistent with that given in the file */
2315   if (sizesset) {
2316     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2317   }
2318   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2319   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2320 
2321   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2322 
2323   /*
2324      This code adds extra rows to make sure the number of rows is
2325      divisible by the blocksize
2326   */
2327   Mbs        = M/bs;
2328   extra_rows = bs - M + bs*(Mbs);
2329   if (extra_rows == bs) extra_rows = 0;
2330   else                  Mbs++;
2331   if (extra_rows &&!rank) {
2332     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2333   }
2334 
2335   /* determine ownership of all rows */
2336   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2337     mbs = Mbs/size + ((Mbs % size) > rank);
2338     m   = mbs*bs;
2339   } else { /* User Set */
2340     m   = newmat->rmap->n;
2341     mbs = m/bs;
2342   }
2343   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2344   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2345   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2346   rowners[0] = 0;
2347   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2348   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2349   rstart = rowners[rank];
2350   rend   = rowners[rank+1];
2351 
2352   /* distribute row lengths to all processors */
2353   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2354   if (!rank) {
2355     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2356     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2357     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2358     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2359     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2360     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2361     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2362   } else {
2363     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2364   }
2365 
2366   if (!rank) {   /* procs[0] */
2367     /* calculate the number of nonzeros on each processor */
2368     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2369     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2370     for (i=0; i<size; i++) {
2371       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2372         procsnz[i] += rowlengths[j];
2373       }
2374     }
2375     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2376 
2377     /* determine max buffer needed and allocate it */
2378     maxnz = 0;
2379     for (i=0; i<size; i++) {
2380       maxnz = PetscMax(maxnz,procsnz[i]);
2381     }
2382     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2383 
2384     /* read in my part of the matrix column indices  */
2385     nz     = procsnz[0];
2386     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2387     mycols = ibuf;
2388     if (size == 1) nz -= extra_rows;
2389     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2390     if (size == 1) {
2391       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2392     }
2393 
2394     /* read in every ones (except the last) and ship off */
2395     for (i=1; i<size-1; i++) {
2396       nz   = procsnz[i];
2397       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2398       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2399     }
2400     /* read in the stuff for the last proc */
2401     if (size != 1) {
2402       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2403       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2404       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2405       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2406     }
2407     ierr = PetscFree(cols);CHKERRQ(ierr);
2408   } else {  /* procs[i], i>0 */
2409     /* determine buffer space needed for message */
2410     nz = 0;
2411     for (i=0; i<m; i++) nz += locrowlens[i];
2412     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2413     mycols = ibuf;
2414     /* receive message of column indices*/
2415     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2416     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2417     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2418   }
2419 
2420   /* loop over local rows, determining number of off diagonal entries */
2421   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2422   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2423   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2424   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2425   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2426   rowcount = 0;
2427   nzcount  = 0;
2428   for (i=0; i<mbs; i++) {
2429     dcount  = 0;
2430     odcount = 0;
2431     for (j=0; j<bs; j++) {
2432       kmax = locrowlens[rowcount];
2433       for (k=0; k<kmax; k++) {
2434         tmp = mycols[nzcount++]/bs; /* block col. index */
2435         if (!mask[tmp]) {
2436           mask[tmp] = 1;
2437           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2438           else masked1[dcount++] = tmp; /* entry in diag portion */
2439         }
2440       }
2441       rowcount++;
2442     }
2443 
2444     dlens[i]  = dcount;  /* d_nzz[i] */
2445     odlens[i] = odcount; /* o_nzz[i] */
2446 
2447     /* zero out the mask elements we set */
2448     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2449     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2450   }
2451   if (!sizesset) {
2452     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2453   }
2454   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2455   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2456 
2457   if (!rank) {
2458     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2459     /* read in my part of the matrix numerical values  */
2460     nz     = procsnz[0];
2461     vals   = buf;
2462     mycols = ibuf;
2463     if (size == 1) nz -= extra_rows;
2464     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2465     if (size == 1) {
2466       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2467     }
2468 
2469     /* insert into matrix */
2470     jj = rstart*bs;
2471     for (i=0; i<m; i++) {
2472       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2473       mycols += locrowlens[i];
2474       vals   += locrowlens[i];
2475       jj++;
2476     }
2477 
2478     /* read in other processors (except the last one) and ship out */
2479     for (i=1; i<size-1; i++) {
2480       nz   = procsnz[i];
2481       vals = buf;
2482       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2483       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2484     }
2485     /* the last proc */
2486     if (size != 1) {
2487       nz   = procsnz[i] - extra_rows;
2488       vals = buf;
2489       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2490       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2491       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2492     }
2493     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2494 
2495   } else {
2496     /* receive numeric values */
2497     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2498 
2499     /* receive message of values*/
2500     vals   = buf;
2501     mycols = ibuf;
2502     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2503     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2504     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2505 
2506     /* insert into matrix */
2507     jj = rstart*bs;
2508     for (i=0; i<m; i++) {
2509       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2510       mycols += locrowlens[i];
2511       vals   += locrowlens[i];
2512       jj++;
2513     }
2514   }
2515 
2516   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2517   ierr = PetscFree(buf);CHKERRQ(ierr);
2518   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2519   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2520   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2521   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2522   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2523   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2524   PetscFunctionReturn(0);
2525 }
2526 
2527 #undef __FUNCT__
2528 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2529 /*XXXXX@
2530    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2531 
2532    Input Parameters:
2533 .  mat  - the matrix
2534 .  fact - factor
2535 
2536    Not Collective on Mat, each process can have a different hash factor
2537 
2538    Level: advanced
2539 
2540   Notes:
2541    This can also be set by the command line option: -mat_use_hash_table fact
2542 
2543 .keywords: matrix, hashtable, factor, HT
2544 
2545 .seealso: MatSetOption()
2546 @XXXXX*/
2547 
2548 
2549 #undef __FUNCT__
2550 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2551 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2552 {
2553   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2554   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2555   PetscReal      atmp;
2556   PetscReal      *work,*svalues,*rvalues;
2557   PetscErrorCode ierr;
2558   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2559   PetscMPIInt    rank,size;
2560   PetscInt       *rowners_bs,dest,count,source;
2561   PetscScalar    *va;
2562   MatScalar      *ba;
2563   MPI_Status     stat;
2564 
2565   PetscFunctionBegin;
2566   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2567   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2568   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2569 
2570   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2571   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2572 
2573   bs  = A->rmap->bs;
2574   mbs = a->mbs;
2575   Mbs = a->Mbs;
2576   ba  = b->a;
2577   bi  = b->i;
2578   bj  = b->j;
2579 
2580   /* find ownerships */
2581   rowners_bs = A->rmap->range;
2582 
2583   /* each proc creates an array to be distributed */
2584   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2585   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2586 
2587   /* row_max for B */
2588   if (rank != size-1) {
2589     for (i=0; i<mbs; i++) {
2590       ncols = bi[1] - bi[0]; bi++;
2591       brow  = bs*i;
2592       for (j=0; j<ncols; j++) {
2593         bcol = bs*(*bj);
2594         for (kcol=0; kcol<bs; kcol++) {
2595           col  = bcol + kcol;                /* local col index */
2596           col += rowners_bs[rank+1];      /* global col index */
2597           for (krow=0; krow<bs; krow++) {
2598             atmp = PetscAbsScalar(*ba); ba++;
2599             row  = brow + krow;   /* local row index */
2600             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2601             if (work[col] < atmp) work[col] = atmp;
2602           }
2603         }
2604         bj++;
2605       }
2606     }
2607 
2608     /* send values to its owners */
2609     for (dest=rank+1; dest<size; dest++) {
2610       svalues = work + rowners_bs[dest];
2611       count   = rowners_bs[dest+1]-rowners_bs[dest];
2612       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2613     }
2614   }
2615 
2616   /* receive values */
2617   if (rank) {
2618     rvalues = work;
2619     count   = rowners_bs[rank+1]-rowners_bs[rank];
2620     for (source=0; source<rank; source++) {
2621       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2622       /* process values */
2623       for (i=0; i<count; i++) {
2624         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2625       }
2626     }
2627   }
2628 
2629   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2630   ierr = PetscFree(work);CHKERRQ(ierr);
2631   PetscFunctionReturn(0);
2632 }
2633 
2634 #undef __FUNCT__
2635 #define __FUNCT__ "MatSOR_MPISBAIJ"
2636 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2637 {
2638   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2639   PetscErrorCode    ierr;
2640   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2641   PetscScalar       *x,*ptr,*from;
2642   Vec               bb1;
2643   const PetscScalar *b;
2644 
2645   PetscFunctionBegin;
2646   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2647   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2648 
2649   if (flag == SOR_APPLY_UPPER) {
2650     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2651     PetscFunctionReturn(0);
2652   }
2653 
2654   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2655     if (flag & SOR_ZERO_INITIAL_GUESS) {
2656       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2657       its--;
2658     }
2659 
2660     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2661     while (its--) {
2662 
2663       /* lower triangular part: slvec0b = - B^T*xx */
2664       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2665 
2666       /* copy xx into slvec0a */
2667       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2668       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2669       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2670       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2671 
2672       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2673 
2674       /* copy bb into slvec1a */
2675       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2676       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2677       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2678       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2679 
2680       /* set slvec1b = 0 */
2681       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2682 
2683       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2684       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2685       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2686       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2687 
2688       /* upper triangular part: bb1 = bb1 - B*x */
2689       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2690 
2691       /* local diagonal sweep */
2692       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2693     }
2694     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2695   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2696     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2697   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2698     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2699   } else if (flag & SOR_EISENSTAT) {
2700     Vec               xx1;
2701     PetscBool         hasop;
2702     const PetscScalar *diag;
2703     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2704     PetscInt          i,n;
2705 
2706     if (!mat->xx1) {
2707       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2708       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2709     }
2710     xx1 = mat->xx1;
2711     bb1 = mat->bb1;
2712 
2713     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2714 
2715     if (!mat->diag) {
2716       /* this is wrong for same matrix with new nonzero values */
2717       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2718       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2719     }
2720     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2721 
2722     if (hasop) {
2723       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2724       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2725     } else {
2726       /*
2727           These two lines are replaced by code that may be a bit faster for a good compiler
2728       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2729       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2730       */
2731       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2732       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2733       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2734       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2735       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2736       if (omega == 1.0) {
2737         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2738         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2739       } else {
2740         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2741         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2742       }
2743       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2744       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2745       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2746       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2747     }
2748 
2749     /* multiply off-diagonal portion of matrix */
2750     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2751     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2752     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2753     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2754     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2755     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2756     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2757     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2758     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2759     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2760 
2761     /* local sweep */
2762     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2763     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2764   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2765   PetscFunctionReturn(0);
2766 }
2767 
2768 #undef __FUNCT__
2769 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2770 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2771 {
2772   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2773   PetscErrorCode ierr;
2774   Vec            lvec1,bb1;
2775 
2776   PetscFunctionBegin;
2777   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2778   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2779 
2780   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2781     if (flag & SOR_ZERO_INITIAL_GUESS) {
2782       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2783       its--;
2784     }
2785 
2786     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2787     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2788     while (its--) {
2789       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2790 
2791       /* lower diagonal part: bb1 = bb - B^T*xx */
2792       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2793       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2794 
2795       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2796       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2797       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2798 
2799       /* upper diagonal part: bb1 = bb1 - B*x */
2800       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2801       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2802 
2803       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2804 
2805       /* diagonal sweep */
2806       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2807     }
2808     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2809     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2810   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2811   PetscFunctionReturn(0);
2812 }
2813 
2814 #undef __FUNCT__
2815 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2816 /*@
2817      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2818          CSR format the local rows.
2819 
2820    Collective on MPI_Comm
2821 
2822    Input Parameters:
2823 +  comm - MPI communicator
2824 .  bs - the block size, only a block size of 1 is supported
2825 .  m - number of local rows (Cannot be PETSC_DECIDE)
2826 .  n - This value should be the same as the local size used in creating the
2827        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2828        calculated if N is given) For square matrices n is almost always m.
2829 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2830 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2831 .   i - row indices
2832 .   j - column indices
2833 -   a - matrix values
2834 
2835    Output Parameter:
2836 .   mat - the matrix
2837 
2838    Level: intermediate
2839 
2840    Notes:
2841        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2842      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2843      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2844 
2845        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2846 
2847 .keywords: matrix, aij, compressed row, sparse, parallel
2848 
2849 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2850           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2851 @*/
2852 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2853 {
2854   PetscErrorCode ierr;
2855 
2856 
2857   PetscFunctionBegin;
2858   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2859   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2860   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2861   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2862   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2863   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2864   PetscFunctionReturn(0);
2865 }
2866 
2867 
2868 #undef __FUNCT__
2869 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2870 /*@C
2871    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2872    (the default parallel PETSc format).
2873 
2874    Collective on MPI_Comm
2875 
2876    Input Parameters:
2877 +  B - the matrix
2878 .  bs - the block size
2879 .  i - the indices into j for the start of each local row (starts with zero)
2880 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2881 -  v - optional values in the matrix
2882 
2883    Level: developer
2884 
2885 .keywords: matrix, aij, compressed row, sparse, parallel
2886 
2887 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2888 @*/
2889 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2890 {
2891   PetscErrorCode ierr;
2892 
2893   PetscFunctionBegin;
2894   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2895   PetscFunctionReturn(0);
2896 }
2897 
2898 #undef __FUNCT__
2899 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
2900 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2901 {
2902   PetscErrorCode ierr;
2903   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2904   PetscInt       *indx;
2905   PetscScalar    *values;
2906 
2907   PetscFunctionBegin;
2908   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2909   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2910     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2911     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
2912     PetscInt       *bindx,rmax=a->rmax,j;
2913 
2914     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2915     mbs = m/bs; Nbs = N/cbs;
2916     if (n == PETSC_DECIDE) {
2917       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
2918     }
2919     /* Check sum(n) = Nbs */
2920     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2921     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
2922 
2923     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2924     rstart -= mbs;
2925 
2926     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2927     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
2928     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2929     for (i=0; i<mbs; i++) {
2930       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2931       nnz = nnz/bs;
2932       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2933       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2934       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2935     }
2936     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2937     ierr = PetscFree(bindx);CHKERRQ(ierr);
2938 
2939     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2940     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2941     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2942     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
2943     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2944     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2945   }
2946 
2947   /* numeric phase */
2948   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2949   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
2950 
2951   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2952   for (i=0; i<m; i++) {
2953     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2954     Ii   = i + rstart;
2955     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2956     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2957   }
2958   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2959   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2960   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2961   PetscFunctionReturn(0);
2962 }
2963