xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision b147fbf3aa2dcdf062a91dda937724f319a78674)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 /* This could be moved to matimpl.h */
12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
13 {
14   Mat            preallocator;
15   PetscInt       r,rstart,rend;
16   PetscInt       bs,i,m,n,M,N;
17   PetscBool      cong = PETSC_TRUE;
18   PetscErrorCode ierr;
19 
20   PetscFunctionBegin;
21   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
22   PetscValidLogicalCollectiveInt(B,nm,2);
23   for (i = 0; i < nm; i++) {
24     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
25     ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr);
26     if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
27   }
28   PetscValidLogicalCollectiveBool(B,fill,5);
29   ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr);
30   ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr);
31   ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
32   ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr);
33   ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
34   ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr);
35   ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
36   ierr = MatSetUp(preallocator);CHKERRQ(ierr);
37   ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
38   for (r = rstart; r < rend; ++r) {
39     PetscInt          ncols;
40     const PetscInt    *row;
41     const PetscScalar *vals;
42 
43     for (i = 0; i < nm; i++) {
44       ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
45       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
46       if (symm && symm[i]) {
47         ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
48       }
49       ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
50     }
51   }
52   ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
53   ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
54   ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr);
55   ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 
59 static PetscErrorCode MatConvert_MPISBAIJ_XAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
60 {
61   Mat            B;
62   PetscErrorCode ierr;
63   PetscInt       r;
64 
65   PetscFunctionBegin;
66   if (reuse != MAT_REUSE_MATRIX) {
67     PetscBool symm = PETSC_TRUE;
68     PetscInt  bs;
69 
70     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
71     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
72     ierr = MatSetType(B,newtype);CHKERRQ(ierr);
73     ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
74     ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr);
75     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
76     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
77     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
78     ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr);
79     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
80   } else B = *newmat;
81 
82   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
83   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
84     PetscInt          ncols;
85     const PetscInt    *row;
86     const PetscScalar *vals;
87 
88     ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
89     ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
90     ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
91     ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
92   }
93   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
94   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
95   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
96 
97   if (reuse == MAT_INPLACE_MATRIX) {
98     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
99   } else {
100     *newmat = B;
101   }
102   PetscFunctionReturn(0);
103 }
104 
105 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
106 {
107   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
108   PetscErrorCode ierr;
109 
110   PetscFunctionBegin;
111   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
112   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
117 {
118   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
119   PetscErrorCode ierr;
120 
121   PetscFunctionBegin;
122   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
123   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
124   PetscFunctionReturn(0);
125 }
126 
127 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
128   { \
129  \
130     brow = row/bs;  \
131     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
132     rmax = aimax[brow]; nrow = ailen[brow]; \
133     bcol = col/bs; \
134     ridx = row % bs; cidx = col % bs; \
135     low  = 0; high = nrow; \
136     while (high-low > 3) { \
137       t = (low+high)/2; \
138       if (rp[t] > bcol) high = t; \
139       else              low  = t; \
140     } \
141     for (_i=low; _i<high; _i++) { \
142       if (rp[_i] > bcol) break; \
143       if (rp[_i] == bcol) { \
144         bap = ap + bs2*_i + bs*cidx + ridx; \
145         if (addv == ADD_VALUES) *bap += value;  \
146         else                    *bap  = value;  \
147         goto a_noinsert; \
148       } \
149     } \
150     if (a->nonew == 1) goto a_noinsert; \
151     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
152     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
153     N = nrow++ - 1;  \
154     /* shift up all the later entries in this row */ \
155     for (ii=N; ii>=_i; ii--) { \
156       rp[ii+1] = rp[ii]; \
157       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
158     } \
159     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
160     rp[_i]                      = bcol;  \
161     ap[bs2*_i + bs*cidx + ridx] = value;  \
162     A->nonzerostate++;\
163 a_noinsert:; \
164     ailen[brow] = nrow; \
165   }
166 
167 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
168   { \
169     brow = row/bs;  \
170     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
171     rmax = bimax[brow]; nrow = bilen[brow]; \
172     bcol = col/bs; \
173     ridx = row % bs; cidx = col % bs; \
174     low  = 0; high = nrow; \
175     while (high-low > 3) { \
176       t = (low+high)/2; \
177       if (rp[t] > bcol) high = t; \
178       else              low  = t; \
179     } \
180     for (_i=low; _i<high; _i++) { \
181       if (rp[_i] > bcol) break; \
182       if (rp[_i] == bcol) { \
183         bap = ap + bs2*_i + bs*cidx + ridx; \
184         if (addv == ADD_VALUES) *bap += value;  \
185         else                    *bap  = value;  \
186         goto b_noinsert; \
187       } \
188     } \
189     if (b->nonew == 1) goto b_noinsert; \
190     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
191     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
192     N = nrow++ - 1;  \
193     /* shift up all the later entries in this row */ \
194     for (ii=N; ii>=_i; ii--) { \
195       rp[ii+1] = rp[ii]; \
196       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
197     } \
198     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
199     rp[_i]                      = bcol;  \
200     ap[bs2*_i + bs*cidx + ridx] = value;  \
201     B->nonzerostate++;\
202 b_noinsert:; \
203     bilen[brow] = nrow; \
204   }
205 
206 /* Only add/insert a(i,j) with i<=j (blocks).
207    Any a(i,j) with i>j input by user is ingored.
208 */
209 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
210 {
211   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
212   MatScalar      value;
213   PetscBool      roworiented = baij->roworiented;
214   PetscErrorCode ierr;
215   PetscInt       i,j,row,col;
216   PetscInt       rstart_orig=mat->rmap->rstart;
217   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
218   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
219 
220   /* Some Variables required in the macro */
221   Mat          A     = baij->A;
222   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
223   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
224   MatScalar    *aa   =a->a;
225 
226   Mat         B     = baij->B;
227   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
228   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
229   MatScalar   *ba   =b->a;
230 
231   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
232   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
233   MatScalar *ap,*bap;
234 
235   /* for stash */
236   PetscInt  n_loc, *in_loc = NULL;
237   MatScalar *v_loc = NULL;
238 
239   PetscFunctionBegin;
240   if (!baij->donotstash) {
241     if (n > baij->n_loc) {
242       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
243       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
244       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
245       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
246 
247       baij->n_loc = n;
248     }
249     in_loc = baij->in_loc;
250     v_loc  = baij->v_loc;
251   }
252 
253   for (i=0; i<m; i++) {
254     if (im[i] < 0) continue;
255 #if defined(PETSC_USE_DEBUG)
256     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
257 #endif
258     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
259       row = im[i] - rstart_orig;              /* local row index */
260       for (j=0; j<n; j++) {
261         if (im[i]/bs > in[j]/bs) {
262           if (a->ignore_ltriangular) {
263             continue;    /* ignore lower triangular blocks */
264           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
265         }
266         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
267           col  = in[j] - cstart_orig;         /* local col index */
268           brow = row/bs; bcol = col/bs;
269           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
270           if (roworiented) value = v[i*n+j];
271           else             value = v[i+j*m];
272           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
273           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
274         } else if (in[j] < 0) continue;
275 #if defined(PETSC_USE_DEBUG)
276         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
277 #endif
278         else {  /* off-diag entry (B) */
279           if (mat->was_assembled) {
280             if (!baij->colmap) {
281               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
282             }
283 #if defined(PETSC_USE_CTABLE)
284             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
285             col  = col - 1;
286 #else
287             col = baij->colmap[in[j]/bs] - 1;
288 #endif
289             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
290               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
291               col  =  in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B    = baij->B;
294               b    = (Mat_SeqBAIJ*)(B)->data;
295               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
296               ba   = b->a;
297             } else col += in[j]%bs;
298           } else col = in[j];
299           if (roworiented) value = v[i*n+j];
300           else             value = v[i+j*m];
301           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
302           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
303         }
304       }
305     } else {  /* off processor entry */
306       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
307       if (!baij->donotstash) {
308         mat->assembled = PETSC_FALSE;
309         n_loc          = 0;
310         for (j=0; j<n; j++) {
311           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
312           in_loc[n_loc] = in[j];
313           if (roworiented) {
314             v_loc[n_loc] = v[i*n+j];
315           } else {
316             v_loc[n_loc] = v[j*m+i];
317           }
318           n_loc++;
319         }
320         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
321       }
322     }
323   }
324   PetscFunctionReturn(0);
325 }
326 
327 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
328 {
329   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
330   PetscErrorCode    ierr;
331   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
332   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
333   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
334   PetscBool         roworiented=a->roworiented;
335   const PetscScalar *value     = v;
336   MatScalar         *ap,*aa = a->a,*bap;
337 
338   PetscFunctionBegin;
339   if (col < row) {
340     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
341     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
342   }
343   rp   = aj + ai[row];
344   ap   = aa + bs2*ai[row];
345   rmax = imax[row];
346   nrow = ailen[row];
347   value = v;
348   low   = 0;
349   high  = nrow;
350 
351   while (high-low > 7) {
352     t = (low+high)/2;
353     if (rp[t] > col) high = t;
354     else             low  = t;
355   }
356   for (i=low; i<high; i++) {
357     if (rp[i] > col) break;
358     if (rp[i] == col) {
359       bap = ap +  bs2*i;
360       if (roworiented) {
361         if (is == ADD_VALUES) {
362           for (ii=0; ii<bs; ii++) {
363             for (jj=ii; jj<bs2; jj+=bs) {
364               bap[jj] += *value++;
365             }
366           }
367         } else {
368           for (ii=0; ii<bs; ii++) {
369             for (jj=ii; jj<bs2; jj+=bs) {
370               bap[jj] = *value++;
371             }
372           }
373         }
374       } else {
375         if (is == ADD_VALUES) {
376           for (ii=0; ii<bs; ii++) {
377             for (jj=0; jj<bs; jj++) {
378               *bap++ += *value++;
379             }
380           }
381         } else {
382           for (ii=0; ii<bs; ii++) {
383             for (jj=0; jj<bs; jj++) {
384               *bap++  = *value++;
385             }
386           }
387         }
388       }
389       goto noinsert2;
390     }
391   }
392   if (nonew == 1) goto noinsert2;
393   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
394   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
395   N = nrow++ - 1; high++;
396   /* shift up all the later entries in this row */
397   for (ii=N; ii>=i; ii--) {
398     rp[ii+1] = rp[ii];
399     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
400   }
401   if (N >= i) {
402     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
403   }
404   rp[i] = col;
405   bap   = ap +  bs2*i;
406   if (roworiented) {
407     for (ii=0; ii<bs; ii++) {
408       for (jj=ii; jj<bs2; jj+=bs) {
409         bap[jj] = *value++;
410       }
411     }
412   } else {
413     for (ii=0; ii<bs; ii++) {
414       for (jj=0; jj<bs; jj++) {
415         *bap++ = *value++;
416       }
417     }
418   }
419   noinsert2:;
420   ailen[row] = nrow;
421   PetscFunctionReturn(0);
422 }
423 
424 /*
425    This routine is exactly duplicated in mpibaij.c
426 */
427 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
428 {
429   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
430   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
431   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
432   PetscErrorCode    ierr;
433   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
434   PetscBool         roworiented=a->roworiented;
435   const PetscScalar *value     = v;
436   MatScalar         *ap,*aa = a->a,*bap;
437 
438   PetscFunctionBegin;
439   rp   = aj + ai[row];
440   ap   = aa + bs2*ai[row];
441   rmax = imax[row];
442   nrow = ailen[row];
443   low  = 0;
444   high = nrow;
445   value = v;
446   while (high-low > 7) {
447     t = (low+high)/2;
448     if (rp[t] > col) high = t;
449     else             low  = t;
450   }
451   for (i=low; i<high; i++) {
452     if (rp[i] > col) break;
453     if (rp[i] == col) {
454       bap = ap +  bs2*i;
455       if (roworiented) {
456         if (is == ADD_VALUES) {
457           for (ii=0; ii<bs; ii++) {
458             for (jj=ii; jj<bs2; jj+=bs) {
459               bap[jj] += *value++;
460             }
461           }
462         } else {
463           for (ii=0; ii<bs; ii++) {
464             for (jj=ii; jj<bs2; jj+=bs) {
465               bap[jj] = *value++;
466             }
467           }
468         }
469       } else {
470         if (is == ADD_VALUES) {
471           for (ii=0; ii<bs; ii++,value+=bs) {
472             for (jj=0; jj<bs; jj++) {
473               bap[jj] += value[jj];
474             }
475             bap += bs;
476           }
477         } else {
478           for (ii=0; ii<bs; ii++,value+=bs) {
479             for (jj=0; jj<bs; jj++) {
480               bap[jj]  = value[jj];
481             }
482             bap += bs;
483           }
484         }
485       }
486       goto noinsert2;
487     }
488   }
489   if (nonew == 1) goto noinsert2;
490   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
491   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
492   N = nrow++ - 1; high++;
493   /* shift up all the later entries in this row */
494   for (ii=N; ii>=i; ii--) {
495     rp[ii+1] = rp[ii];
496     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
497   }
498   if (N >= i) {
499     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
500   }
501   rp[i] = col;
502   bap   = ap +  bs2*i;
503   if (roworiented) {
504     for (ii=0; ii<bs; ii++) {
505       for (jj=ii; jj<bs2; jj+=bs) {
506         bap[jj] = *value++;
507       }
508     }
509   } else {
510     for (ii=0; ii<bs; ii++) {
511       for (jj=0; jj<bs; jj++) {
512         *bap++ = *value++;
513       }
514     }
515   }
516   noinsert2:;
517   ailen[row] = nrow;
518   PetscFunctionReturn(0);
519 }
520 
521 /*
522     This routine could be optimized by removing the need for the block copy below and passing stride information
523   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
524 */
525 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
526 {
527   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
528   const MatScalar *value;
529   MatScalar       *barray     =baij->barray;
530   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
531   PetscErrorCode  ierr;
532   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
533   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
534   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
535 
536   PetscFunctionBegin;
537   if (!barray) {
538     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
539     baij->barray = barray;
540   }
541 
542   if (roworiented) {
543     stepval = (n-1)*bs;
544   } else {
545     stepval = (m-1)*bs;
546   }
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row = im[i] - rstart;
554       for (j=0; j<n; j++) {
555         if (im[i] > in[j]) {
556           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
557           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
558         }
559         /* If NumCol = 1 then a copy is not required */
560         if ((roworiented) && (n == 1)) {
561           barray = (MatScalar*) v + i*bs2;
562         } else if ((!roworiented) && (m == 1)) {
563           barray = (MatScalar*) v + j*bs2;
564         } else { /* Here a copy is required */
565           if (roworiented) {
566             value = v + i*(stepval+bs)*bs + j*bs;
567           } else {
568             value = v + j*(stepval+bs)*bs + i*bs;
569           }
570           for (ii=0; ii<bs; ii++,value+=stepval) {
571             for (jj=0; jj<bs; jj++) {
572               *barray++ = *value++;
573             }
574           }
575           barray -=bs2;
576         }
577 
578         if (in[j] >= cstart && in[j] < cend) {
579           col  = in[j] - cstart;
580           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
581         } else if (in[j] < 0) continue;
582 #if defined(PETSC_USE_DEBUG)
583         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
584 #endif
585         else {
586           if (mat->was_assembled) {
587             if (!baij->colmap) {
588               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
589             }
590 
591 #if defined(PETSC_USE_DEBUG)
592 #if defined(PETSC_USE_CTABLE)
593             { PetscInt data;
594               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
595               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
596             }
597 #else
598             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
599 #endif
600 #endif
601 #if defined(PETSC_USE_CTABLE)
602             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
603             col  = (col - 1)/bs;
604 #else
605             col = (baij->colmap[in[j]] - 1)/bs;
606 #endif
607             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
608               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
609               col  = in[j];
610             }
611           } else col = in[j];
612           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
613         }
614       }
615     } else {
616       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
617       if (!baij->donotstash) {
618         if (roworiented) {
619           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
620         } else {
621           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
622         }
623       }
624     }
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
630 {
631   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
632   PetscErrorCode ierr;
633   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
634   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
635 
636   PetscFunctionBegin;
637   for (i=0; i<m; i++) {
638     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
639     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
640     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
641       row = idxm[i] - bsrstart;
642       for (j=0; j<n; j++) {
643         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
644         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
645         if (idxn[j] >= bscstart && idxn[j] < bscend) {
646           col  = idxn[j] - bscstart;
647           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
648         } else {
649           if (!baij->colmap) {
650             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
651           }
652 #if defined(PETSC_USE_CTABLE)
653           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
654           data--;
655 #else
656           data = baij->colmap[idxn[j]/bs]-1;
657 #endif
658           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
659           else {
660             col  = data + idxn[j]%bs;
661             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
662           }
663         }
664       }
665     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
666   }
667   PetscFunctionReturn(0);
668 }
669 
670 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
671 {
672   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscReal      sum[2],*lnorm2;
675 
676   PetscFunctionBegin;
677   if (baij->size == 1) {
678     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
679   } else {
680     if (type == NORM_FROBENIUS) {
681       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
682       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
683       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
684       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
685       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
686       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
687       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
688       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
689     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
690       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
691       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
692       PetscReal    *rsum,*rsum2,vabs;
693       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
694       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
695       MatScalar    *v;
696 
697       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
698       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
699       /* Amat */
700       v = amat->a; jj = amat->j;
701       for (brow=0; brow<mbs; brow++) {
702         grow = bs*(rstart + brow);
703         nz   = amat->i[brow+1] - amat->i[brow];
704         for (bcol=0; bcol<nz; bcol++) {
705           gcol = bs*(rstart + *jj); jj++;
706           for (col=0; col<bs; col++) {
707             for (row=0; row<bs; row++) {
708               vabs            = PetscAbsScalar(*v); v++;
709               rsum[gcol+col] += vabs;
710               /* non-diagonal block */
711               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
712             }
713           }
714         }
715         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
716       }
717       /* Bmat */
718       v = bmat->a; jj = bmat->j;
719       for (brow=0; brow<mbs; brow++) {
720         grow = bs*(rstart + brow);
721         nz = bmat->i[brow+1] - bmat->i[brow];
722         for (bcol=0; bcol<nz; bcol++) {
723           gcol = bs*garray[*jj]; jj++;
724           for (col=0; col<bs; col++) {
725             for (row=0; row<bs; row++) {
726               vabs            = PetscAbsScalar(*v); v++;
727               rsum[gcol+col] += vabs;
728               rsum[grow+row] += vabs;
729             }
730           }
731         }
732         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
733       }
734       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
735       *norm = 0.0;
736       for (col=0; col<mat->cmap->N; col++) {
737         if (rsum2[col] > *norm) *norm = rsum2[col];
738       }
739       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
740     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
746 {
747   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
748   PetscErrorCode ierr;
749   PetscInt       nstash,reallocs;
750 
751   PetscFunctionBegin;
752   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
753 
754   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
755   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
756   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
757   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
758   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
759   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
760   PetscFunctionReturn(0);
761 }
762 
763 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
764 {
765   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
766   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
767   PetscErrorCode ierr;
768   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
769   PetscInt       *row,*col;
770   PetscBool      other_disassembled;
771   PetscMPIInt    n;
772   PetscBool      r1,r2,r3;
773   MatScalar      *val;
774 
775   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
776   PetscFunctionBegin;
777   if (!baij->donotstash &&  !mat->nooffprocentries) {
778     while (1) {
779       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
780       if (!flg) break;
781 
782       for (i=0; i<n;) {
783         /* Now identify the consecutive vals belonging to the same row */
784         for (j=i,rstart=row[j]; j<n; j++) {
785           if (row[j] != rstart) break;
786         }
787         if (j < n) ncols = j-i;
788         else       ncols = n-i;
789         /* Now assemble all these values with a single function call */
790         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
791         i    = j;
792       }
793     }
794     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
795     /* Now process the block-stash. Since the values are stashed column-oriented,
796        set the roworiented flag to column oriented, and after MatSetValues()
797        restore the original flags */
798     r1 = baij->roworiented;
799     r2 = a->roworiented;
800     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
801 
802     baij->roworiented = PETSC_FALSE;
803     a->roworiented    = PETSC_FALSE;
804 
805     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
806     while (1) {
807       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
808       if (!flg) break;
809 
810       for (i=0; i<n;) {
811         /* Now identify the consecutive vals belonging to the same row */
812         for (j=i,rstart=row[j]; j<n; j++) {
813           if (row[j] != rstart) break;
814         }
815         if (j < n) ncols = j-i;
816         else       ncols = n-i;
817         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
818         i    = j;
819       }
820     }
821     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
822 
823     baij->roworiented = r1;
824     a->roworiented    = r2;
825 
826     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
827   }
828 
829   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
830   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
831 
832   /* determine if any processor has disassembled, if so we must
833      also disassemble ourselfs, in order that we may reassemble. */
834   /*
835      if nonzero structure of submatrix B cannot change then we know that
836      no processor disassembled thus we can skip this stuff
837   */
838   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
839     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
840     if (mat->was_assembled && !other_disassembled) {
841       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
842     }
843   }
844 
845   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
846     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
847   }
848   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
849   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
850 
851   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
852 
853   baij->rowvalues = 0;
854 
855   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
856   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
857     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
858     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
859   }
860   PetscFunctionReturn(0);
861 }
862 
863 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
864 #include <petscdraw.h>
865 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
866 {
867   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
868   PetscErrorCode    ierr;
869   PetscInt          bs   = mat->rmap->bs;
870   PetscMPIInt       rank = baij->rank;
871   PetscBool         iascii,isdraw;
872   PetscViewer       sviewer;
873   PetscViewerFormat format;
874 
875   PetscFunctionBegin;
876   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
877   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
878   if (iascii) {
879     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
880     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
881       MatInfo info;
882       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
883       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
884       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
885       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
886       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
887       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
888       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
890       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
891       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
892       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
893       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
894       PetscFunctionReturn(0);
895     } else if (format == PETSC_VIEWER_ASCII_INFO) {
896       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
897       PetscFunctionReturn(0);
898     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
899       PetscFunctionReturn(0);
900     }
901   }
902 
903   if (isdraw) {
904     PetscDraw draw;
905     PetscBool isnull;
906     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
907     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
908     if (isnull) PetscFunctionReturn(0);
909   }
910 
911   {
912     /* assemble the entire matrix onto first processor. */
913     Mat          A;
914     Mat_SeqSBAIJ *Aloc;
915     Mat_SeqBAIJ  *Bloc;
916     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
917     MatScalar    *a;
918     const char   *matname;
919 
920     /* Should this be the same type as mat? */
921     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
922     if (!rank) {
923       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
924     } else {
925       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
926     }
927     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
928     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
929     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
930     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
931 
932     /* copy over the A part */
933     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
934     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
935     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
936 
937     for (i=0; i<mbs; i++) {
938       rvals[0] = bs*(baij->rstartbs + i);
939       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
940       for (j=ai[i]; j<ai[i+1]; j++) {
941         col = (baij->cstartbs+aj[j])*bs;
942         for (k=0; k<bs; k++) {
943           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
944           col++;
945           a += bs;
946         }
947       }
948     }
949     /* copy over the B part */
950     Bloc = (Mat_SeqBAIJ*)baij->B->data;
951     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
952     for (i=0; i<mbs; i++) {
953 
954       rvals[0] = bs*(baij->rstartbs + i);
955       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
956       for (j=ai[i]; j<ai[i+1]; j++) {
957         col = baij->garray[aj[j]]*bs;
958         for (k=0; k<bs; k++) {
959           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
960           col++;
961           a += bs;
962         }
963       }
964     }
965     ierr = PetscFree(rvals);CHKERRQ(ierr);
966     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
967     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
968     /*
969        Everyone has to call to draw the matrix since the graphics waits are
970        synchronized across all processors that share the PetscDraw object
971     */
972     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
973     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
974     if (!rank) {
975       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
976       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
977     }
978     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
979     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
980     ierr = MatDestroy(&A);CHKERRQ(ierr);
981   }
982   PetscFunctionReturn(0);
983 }
984 
985 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
986 {
987   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
988   Mat_SeqSBAIJ   *A = (Mat_SeqSBAIJ*)a->A->data;
989   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
990   PetscErrorCode ierr;
991   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
992   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
993   int            fd;
994   PetscScalar    *column_values;
995   FILE           *file;
996   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
997   PetscInt       message_count,flowcontrolcount;
998 
999   PetscFunctionBegin;
1000   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1001   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1002   nz   = bs2*(A->nz + B->nz);
1003   rlen = mat->rmap->n;
1004   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1005   if (!rank) {
1006     header[0] = MAT_FILE_CLASSID;
1007     header[1] = mat->rmap->N;
1008     header[2] = mat->cmap->N;
1009 
1010     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1011     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1012     /* get largest number of rows any processor has */
1013     range = mat->rmap->range;
1014     for (i=1; i<size; i++) {
1015       rlen = PetscMax(rlen,range[i+1] - range[i]);
1016     }
1017   } else {
1018     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1019   }
1020 
1021   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1022   /* compute lengths of each row  */
1023   for (i=0; i<a->mbs; i++) {
1024     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1025   }
1026   /* store the row lengths to the file */
1027   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1028   if (!rank) {
1029     MPI_Status status;
1030     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1031     rlen = (range[1] - range[0])/bs;
1032     for (i=0; i<rlen; i++) {
1033       for (j=0; j<bs; j++) {
1034         row_lens[i*bs+j] = bs*crow_lens[i];
1035       }
1036     }
1037     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1038     for (i=1; i<size; i++) {
1039       rlen = (range[i+1] - range[i])/bs;
1040       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1041       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1042       for (k=0; k<rlen; k++) {
1043         for (j=0; j<bs; j++) {
1044           row_lens[k*bs+j] = bs*crow_lens[k];
1045         }
1046       }
1047       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1048     }
1049     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1050     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1051   } else {
1052     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1053     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1054     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1055   }
1056   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1057 
1058   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1059      information needed to make it for each row from a block row. This does require more communication but still not more than
1060      the communication needed for the nonzero values  */
1061   nzmax = nz; /*  space a largest processor needs */
1062   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1063   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1064   cnt   = 0;
1065   for (i=0; i<a->mbs; i++) {
1066     pcnt = cnt;
1067     for (j=B->i[i]; j<B->i[i+1]; j++) {
1068       if ((col = garray[B->j[j]]) > cstart) break;
1069       for (l=0; l<bs; l++) {
1070         column_indices[cnt++] = bs*col+l;
1071       }
1072     }
1073     for (k=A->i[i]; k<A->i[i+1]; k++) {
1074       for (l=0; l<bs; l++) {
1075         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1076       }
1077     }
1078     for (; j<B->i[i+1]; j++) {
1079       for (l=0; l<bs; l++) {
1080         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1081       }
1082     }
1083     len = cnt - pcnt;
1084     for (k=1; k<bs; k++) {
1085       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1086       cnt += len;
1087     }
1088   }
1089   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1090 
1091   /* store the columns to the file */
1092   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1093   if (!rank) {
1094     MPI_Status status;
1095     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1096     for (i=1; i<size; i++) {
1097       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1098       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1099       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1100       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1101     }
1102     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1103   } else {
1104     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1105     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1106     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1107     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1108   }
1109   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1110 
1111   /* load up the numerical values */
1112   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1113   cnt  = 0;
1114   for (i=0; i<a->mbs; i++) {
1115     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1116     for (j=B->i[i]; j<B->i[i+1]; j++) {
1117       if (garray[B->j[j]] > cstart) break;
1118       for (l=0; l<bs; l++) {
1119         for (ll=0; ll<bs; ll++) {
1120           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1121         }
1122       }
1123       cnt += bs;
1124     }
1125     for (k=A->i[i]; k<A->i[i+1]; k++) {
1126       for (l=0; l<bs; l++) {
1127         for (ll=0; ll<bs; ll++) {
1128           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1129         }
1130       }
1131       cnt += bs;
1132     }
1133     for (; j<B->i[i+1]; j++) {
1134       for (l=0; l<bs; l++) {
1135         for (ll=0; ll<bs; ll++) {
1136           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1137         }
1138       }
1139       cnt += bs;
1140     }
1141     cnt += (bs-1)*rlen;
1142   }
1143   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1144 
1145   /* store the column values to the file */
1146   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1147   if (!rank) {
1148     MPI_Status status;
1149     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1150     for (i=1; i<size; i++) {
1151       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1152       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1153       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1154       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1155     }
1156     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1157   } else {
1158     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1159     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1160     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1161     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1162   }
1163   ierr = PetscFree(column_values);CHKERRQ(ierr);
1164 
1165   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1166   if (file) {
1167     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1173 {
1174   PetscErrorCode ierr;
1175   PetscBool      iascii,isdraw,issocket,isbinary;
1176 
1177   PetscFunctionBegin;
1178   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1179   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1180   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1181   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1182   if (iascii || isdraw || issocket) {
1183     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1184   } else if (isbinary) {
1185     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1186   }
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1191 {
1192   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196 #if defined(PETSC_USE_LOG)
1197   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1198 #endif
1199   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1200   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1201   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1202   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1203 #if defined(PETSC_USE_CTABLE)
1204   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1205 #else
1206   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1207 #endif
1208   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1209   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1210   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1211   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1212   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1213   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1214   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1215   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1216   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1217   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1218   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1219   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1220   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1221   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1222   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1223 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1224   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1225 #endif
1226   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1227   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1228   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1229   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1230 
1231   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1235 #if defined(PETSC_HAVE_ELEMENTAL)
1236   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1237 #endif
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr);
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1244 {
1245   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1246   PetscErrorCode    ierr;
1247   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1248   PetscScalar       *from;
1249   const PetscScalar *x;
1250 
1251   PetscFunctionBegin;
1252   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1253   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1254 
1255   /* diagonal part */
1256   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1257   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1258 
1259   /* subdiagonal part */
1260   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1261 
1262   /* copy x into the vec slvec0 */
1263   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1264   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1265 
1266   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1267   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1268   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1269 
1270   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1271   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1272   /* supperdiagonal part */
1273   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1278 {
1279   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1280   PetscErrorCode    ierr;
1281   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1282   PetscScalar       *from;
1283   const PetscScalar *x;
1284 
1285   PetscFunctionBegin;
1286   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1287   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1288 
1289   /* diagonal part */
1290   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1291   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1292 
1293   /* subdiagonal part */
1294   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1295 
1296   /* copy x into the vec slvec0 */
1297   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1298   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1299 
1300   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1301   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1302   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1303 
1304   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1306   /* supperdiagonal part */
1307   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1312 {
1313   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315   PetscInt       nt;
1316 
1317   PetscFunctionBegin;
1318   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1319   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1320 
1321   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1322   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1323 
1324   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1325   /* do diagonal part */
1326   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1327   /* do supperdiagonal part */
1328   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1329   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1330   /* do subdiagonal part */
1331   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1332   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1333   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1338 {
1339   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1340   PetscErrorCode    ierr;
1341   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1342   PetscScalar       *from,zero=0.0;
1343   const PetscScalar *x;
1344 
1345   PetscFunctionBegin;
1346   /*
1347   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1348   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1349   */
1350   /* diagonal part */
1351   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1352   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1353 
1354   /* subdiagonal part */
1355   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1356 
1357   /* copy x into the vec slvec0 */
1358   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1359   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1360   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1361   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1362 
1363   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1364   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1365   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1366 
1367   /* supperdiagonal part */
1368   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1373 {
1374   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1375   PetscErrorCode ierr;
1376 
1377   PetscFunctionBegin;
1378   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1379   /* do diagonal part */
1380   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1381   /* do supperdiagonal part */
1382   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1383   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1384 
1385   /* do subdiagonal part */
1386   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1387   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1388   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 /*
1393   This only works correctly for square matrices where the subblock A->A is the
1394    diagonal block
1395 */
1396 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1397 {
1398   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1399   PetscErrorCode ierr;
1400 
1401   PetscFunctionBegin;
1402   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1403   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1408 {
1409   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1410   PetscErrorCode ierr;
1411 
1412   PetscFunctionBegin;
1413   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1414   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1419 {
1420   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1421   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1422   PetscErrorCode ierr;
1423   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1424   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1425   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1426 
1427   PetscFunctionBegin;
1428   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1429   mat->getrowactive = PETSC_TRUE;
1430 
1431   if (!mat->rowvalues && (idx || v)) {
1432     /*
1433         allocate enough space to hold information from the longest row.
1434     */
1435     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1436     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1437     PetscInt     max = 1,mbs = mat->mbs,tmp;
1438     for (i=0; i<mbs; i++) {
1439       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1440       if (max < tmp) max = tmp;
1441     }
1442     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1443   }
1444 
1445   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1446   lrow = row - brstart;  /* local row index */
1447 
1448   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1449   if (!v)   {pvA = 0; pvB = 0;}
1450   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1451   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1452   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1453   nztot = nzA + nzB;
1454 
1455   cmap = mat->garray;
1456   if (v  || idx) {
1457     if (nztot) {
1458       /* Sort by increasing column numbers, assuming A and B already sorted */
1459       PetscInt imark = -1;
1460       if (v) {
1461         *v = v_p = mat->rowvalues;
1462         for (i=0; i<nzB; i++) {
1463           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1464           else break;
1465         }
1466         imark = i;
1467         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1468         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1469       }
1470       if (idx) {
1471         *idx = idx_p = mat->rowindices;
1472         if (imark > -1) {
1473           for (i=0; i<imark; i++) {
1474             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1475           }
1476         } else {
1477           for (i=0; i<nzB; i++) {
1478             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1479             else break;
1480           }
1481           imark = i;
1482         }
1483         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1484         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1485       }
1486     } else {
1487       if (idx) *idx = 0;
1488       if (v)   *v   = 0;
1489     }
1490   }
1491   *nz  = nztot;
1492   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1493   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1498 {
1499   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1500 
1501   PetscFunctionBegin;
1502   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1503   baij->getrowactive = PETSC_FALSE;
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1508 {
1509   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1510   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1511 
1512   PetscFunctionBegin;
1513   aA->getrow_utriangular = PETSC_TRUE;
1514   PetscFunctionReturn(0);
1515 }
1516 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1517 {
1518   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1519   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1520 
1521   PetscFunctionBegin;
1522   aA->getrow_utriangular = PETSC_FALSE;
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1527 {
1528   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1529   PetscErrorCode ierr;
1530 
1531   PetscFunctionBegin;
1532   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1533   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1538 {
1539   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1540   PetscErrorCode ierr;
1541 
1542   PetscFunctionBegin;
1543   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1544   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1549    Input: isrow       - distributed(parallel),
1550           iscol_local - locally owned (seq)
1551 */
1552 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1553 {
1554   PetscErrorCode ierr;
1555   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1556   const PetscInt *ptr1,*ptr2;
1557 
1558   PetscFunctionBegin;
1559   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1560   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1561   if (sz1 > sz2) {
1562     *flg = PETSC_FALSE;
1563     PetscFunctionReturn(0);
1564   }
1565 
1566   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1567   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1568 
1569   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1570   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1571   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1572   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1573   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1574   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1575 
1576   nmatch=0;
1577   k     = 0;
1578   for (i=0; i<sz1; i++){
1579     for (j=k; j<sz2; j++){
1580       if (a1[i] == a2[j]) {
1581         k = j; nmatch++;
1582         break;
1583       }
1584     }
1585   }
1586   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1587   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1588   ierr = PetscFree(a1);CHKERRQ(ierr);
1589   ierr = PetscFree(a2);CHKERRQ(ierr);
1590   if (nmatch < sz1) {
1591     *flg = PETSC_FALSE;
1592   } else {
1593     *flg = PETSC_TRUE;
1594   }
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1599 {
1600   PetscErrorCode ierr;
1601   IS             iscol_local;
1602   PetscInt       csize;
1603   PetscBool      isequal;
1604 
1605   PetscFunctionBegin;
1606   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1607   if (call == MAT_REUSE_MATRIX) {
1608     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1609     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1610   } else {
1611     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1612     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1613     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1614   }
1615 
1616   /* now call MatCreateSubMatrix_MPIBAIJ() */
1617   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1618   if (call == MAT_INITIAL_MATRIX) {
1619     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1620     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1621   }
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1626 {
1627   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1628   PetscErrorCode ierr;
1629 
1630   PetscFunctionBegin;
1631   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1632   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1633   PetscFunctionReturn(0);
1634 }
1635 
1636 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1637 {
1638   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1639   Mat            A  = a->A,B = a->B;
1640   PetscErrorCode ierr;
1641   PetscReal      isend[5],irecv[5];
1642 
1643   PetscFunctionBegin;
1644   info->block_size = (PetscReal)matin->rmap->bs;
1645 
1646   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1647 
1648   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1649   isend[3] = info->memory;  isend[4] = info->mallocs;
1650 
1651   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1652 
1653   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1654   isend[3] += info->memory;  isend[4] += info->mallocs;
1655   if (flag == MAT_LOCAL) {
1656     info->nz_used      = isend[0];
1657     info->nz_allocated = isend[1];
1658     info->nz_unneeded  = isend[2];
1659     info->memory       = isend[3];
1660     info->mallocs      = isend[4];
1661   } else if (flag == MAT_GLOBAL_MAX) {
1662     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1663 
1664     info->nz_used      = irecv[0];
1665     info->nz_allocated = irecv[1];
1666     info->nz_unneeded  = irecv[2];
1667     info->memory       = irecv[3];
1668     info->mallocs      = irecv[4];
1669   } else if (flag == MAT_GLOBAL_SUM) {
1670     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1671 
1672     info->nz_used      = irecv[0];
1673     info->nz_allocated = irecv[1];
1674     info->nz_unneeded  = irecv[2];
1675     info->memory       = irecv[3];
1676     info->mallocs      = irecv[4];
1677   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1678   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1679   info->fill_ratio_needed = 0;
1680   info->factor_mallocs    = 0;
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1685 {
1686   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1687   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1688   PetscErrorCode ierr;
1689 
1690   PetscFunctionBegin;
1691   switch (op) {
1692   case MAT_NEW_NONZERO_LOCATIONS:
1693   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1694   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1695   case MAT_KEEP_NONZERO_PATTERN:
1696   case MAT_SUBMAT_SINGLEIS:
1697   case MAT_NEW_NONZERO_LOCATION_ERR:
1698     MatCheckPreallocated(A,1);
1699     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1700     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1701     break;
1702   case MAT_ROW_ORIENTED:
1703     MatCheckPreallocated(A,1);
1704     a->roworiented = flg;
1705 
1706     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1707     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1708     break;
1709   case MAT_NEW_DIAGONALS:
1710     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1711     break;
1712   case MAT_IGNORE_OFF_PROC_ENTRIES:
1713     a->donotstash = flg;
1714     break;
1715   case MAT_USE_HASH_TABLE:
1716     a->ht_flag = flg;
1717     break;
1718   case MAT_HERMITIAN:
1719     MatCheckPreallocated(A,1);
1720     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1721     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1722 #if defined(PETSC_USE_COMPLEX)
1723     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1724 #endif
1725     break;
1726   case MAT_SPD:
1727     A->spd_set = PETSC_TRUE;
1728     A->spd     = flg;
1729     if (flg) {
1730       A->symmetric                  = PETSC_TRUE;
1731       A->structurally_symmetric     = PETSC_TRUE;
1732       A->symmetric_set              = PETSC_TRUE;
1733       A->structurally_symmetric_set = PETSC_TRUE;
1734     }
1735     break;
1736   case MAT_SYMMETRIC:
1737     MatCheckPreallocated(A,1);
1738     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1739     break;
1740   case MAT_STRUCTURALLY_SYMMETRIC:
1741     MatCheckPreallocated(A,1);
1742     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1743     break;
1744   case MAT_SYMMETRY_ETERNAL:
1745     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1746     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1747     break;
1748   case MAT_IGNORE_LOWER_TRIANGULAR:
1749     aA->ignore_ltriangular = flg;
1750     break;
1751   case MAT_ERROR_LOWER_TRIANGULAR:
1752     aA->ignore_ltriangular = flg;
1753     break;
1754   case MAT_GETROW_UPPERTRIANGULAR:
1755     aA->getrow_utriangular = flg;
1756     break;
1757   default:
1758     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1759   }
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1764 {
1765   PetscErrorCode ierr;
1766 
1767   PetscFunctionBegin;
1768   if (reuse == MAT_INITIAL_MATRIX) {
1769     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1770   }  else if (reuse == MAT_REUSE_MATRIX) {
1771     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1772   }
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1777 {
1778   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1779   Mat            a     = baij->A, b=baij->B;
1780   PetscErrorCode ierr;
1781   PetscInt       nv,m,n;
1782   PetscBool      flg;
1783 
1784   PetscFunctionBegin;
1785   if (ll != rr) {
1786     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1787     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1788   }
1789   if (!ll) PetscFunctionReturn(0);
1790 
1791   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1792   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1793 
1794   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1795   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1796 
1797   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1798 
1799   /* left diagonalscale the off-diagonal part */
1800   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1801 
1802   /* scale the diagonal part */
1803   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1804 
1805   /* right diagonalscale the off-diagonal part */
1806   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1807   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1808   PetscFunctionReturn(0);
1809 }
1810 
1811 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1812 {
1813   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1814   PetscErrorCode ierr;
1815 
1816   PetscFunctionBegin;
1817   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1822 
1823 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1824 {
1825   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1826   Mat            a,b,c,d;
1827   PetscBool      flg;
1828   PetscErrorCode ierr;
1829 
1830   PetscFunctionBegin;
1831   a = matA->A; b = matA->B;
1832   c = matB->A; d = matB->B;
1833 
1834   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1835   if (flg) {
1836     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1837   }
1838   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1843 {
1844   PetscErrorCode ierr;
1845   PetscBool      isbaij;
1846 
1847   PetscFunctionBegin;
1848   ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
1849   if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1850   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1851   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1852     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1853     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1854     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1855   } else {
1856     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1857     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1858 
1859     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1860     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1861   }
1862   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1867 {
1868   PetscErrorCode ierr;
1869 
1870   PetscFunctionBegin;
1871   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1876 {
1877   PetscErrorCode ierr;
1878   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1879   PetscBLASInt   bnz,one=1;
1880   Mat_SeqSBAIJ   *xa,*ya;
1881   Mat_SeqBAIJ    *xb,*yb;
1882 
1883   PetscFunctionBegin;
1884   if (str == SAME_NONZERO_PATTERN) {
1885     PetscScalar alpha = a;
1886     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1887     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1888     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1889     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1890     xb   = (Mat_SeqBAIJ*)xx->B->data;
1891     yb   = (Mat_SeqBAIJ*)yy->B->data;
1892     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1893     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1894     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1895   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1896     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1897     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1898     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1899   } else {
1900     Mat      B;
1901     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1902     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1903     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1904     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1905     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1906     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1907     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1908     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1909     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1910     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1911     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1912     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1913     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1914     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1915     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1916     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1917     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1918     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1919     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1920     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1921   }
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1926 {
1927   PetscErrorCode ierr;
1928   PetscInt       i;
1929   PetscBool      flg;
1930 
1931   PetscFunctionBegin;
1932   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1933   for (i=0; i<n; i++) {
1934     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1935     if (!flg) {
1936       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1937     }
1938   }
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1943 {
1944   PetscErrorCode ierr;
1945   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1946   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1947 
1948   PetscFunctionBegin;
1949   if (!Y->preallocated) {
1950     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1951   } else if (!aij->nz) {
1952     PetscInt nonew = aij->nonew;
1953     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1954     aij->nonew = nonew;
1955   }
1956   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1961 {
1962   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1963   PetscErrorCode ierr;
1964 
1965   PetscFunctionBegin;
1966   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1967   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1968   if (d) {
1969     PetscInt rstart;
1970     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1971     *d += rstart/A->rmap->bs;
1972 
1973   }
1974   PetscFunctionReturn(0);
1975 }
1976 
1977 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1978 {
1979   PetscFunctionBegin;
1980   *a = ((Mat_MPISBAIJ*)A->data)->A;
1981   PetscFunctionReturn(0);
1982 }
1983 
1984 /* -------------------------------------------------------------------*/
1985 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1986                                        MatGetRow_MPISBAIJ,
1987                                        MatRestoreRow_MPISBAIJ,
1988                                        MatMult_MPISBAIJ,
1989                                /*  4*/ MatMultAdd_MPISBAIJ,
1990                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1991                                        MatMultAdd_MPISBAIJ,
1992                                        0,
1993                                        0,
1994                                        0,
1995                                /* 10*/ 0,
1996                                        0,
1997                                        0,
1998                                        MatSOR_MPISBAIJ,
1999                                        MatTranspose_MPISBAIJ,
2000                                /* 15*/ MatGetInfo_MPISBAIJ,
2001                                        MatEqual_MPISBAIJ,
2002                                        MatGetDiagonal_MPISBAIJ,
2003                                        MatDiagonalScale_MPISBAIJ,
2004                                        MatNorm_MPISBAIJ,
2005                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
2006                                        MatAssemblyEnd_MPISBAIJ,
2007                                        MatSetOption_MPISBAIJ,
2008                                        MatZeroEntries_MPISBAIJ,
2009                                /* 24*/ 0,
2010                                        0,
2011                                        0,
2012                                        0,
2013                                        0,
2014                                /* 29*/ MatSetUp_MPISBAIJ,
2015                                        0,
2016                                        0,
2017                                        MatGetDiagonalBlock_MPISBAIJ,
2018                                        0,
2019                                /* 34*/ MatDuplicate_MPISBAIJ,
2020                                        0,
2021                                        0,
2022                                        0,
2023                                        0,
2024                                /* 39*/ MatAXPY_MPISBAIJ,
2025                                        MatCreateSubMatrices_MPISBAIJ,
2026                                        MatIncreaseOverlap_MPISBAIJ,
2027                                        MatGetValues_MPISBAIJ,
2028                                        MatCopy_MPISBAIJ,
2029                                /* 44*/ 0,
2030                                        MatScale_MPISBAIJ,
2031                                        MatShift_MPISBAIJ,
2032                                        0,
2033                                        0,
2034                                /* 49*/ 0,
2035                                        0,
2036                                        0,
2037                                        0,
2038                                        0,
2039                                /* 54*/ 0,
2040                                        0,
2041                                        MatSetUnfactored_MPISBAIJ,
2042                                        0,
2043                                        MatSetValuesBlocked_MPISBAIJ,
2044                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
2045                                        0,
2046                                        0,
2047                                        0,
2048                                        0,
2049                                /* 64*/ 0,
2050                                        0,
2051                                        0,
2052                                        0,
2053                                        0,
2054                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
2055                                        0,
2056                                        0,
2057                                        0,
2058                                        0,
2059                                /* 74*/ 0,
2060                                        0,
2061                                        0,
2062                                        0,
2063                                        0,
2064                                /* 79*/ 0,
2065                                        0,
2066                                        0,
2067                                        0,
2068                                        MatLoad_MPISBAIJ,
2069                                /* 84*/ 0,
2070                                        0,
2071                                        0,
2072                                        0,
2073                                        0,
2074                                /* 89*/ 0,
2075                                        0,
2076                                        0,
2077                                        0,
2078                                        0,
2079                                /* 94*/ 0,
2080                                        0,
2081                                        0,
2082                                        0,
2083                                        0,
2084                                /* 99*/ 0,
2085                                        0,
2086                                        0,
2087                                        0,
2088                                        0,
2089                                /*104*/ 0,
2090                                        MatRealPart_MPISBAIJ,
2091                                        MatImaginaryPart_MPISBAIJ,
2092                                        MatGetRowUpperTriangular_MPISBAIJ,
2093                                        MatRestoreRowUpperTriangular_MPISBAIJ,
2094                                /*109*/ 0,
2095                                        0,
2096                                        0,
2097                                        0,
2098                                        MatMissingDiagonal_MPISBAIJ,
2099                                /*114*/ 0,
2100                                        0,
2101                                        0,
2102                                        0,
2103                                        0,
2104                                /*119*/ 0,
2105                                        0,
2106                                        0,
2107                                        0,
2108                                        0,
2109                                /*124*/ 0,
2110                                        0,
2111                                        0,
2112                                        0,
2113                                        0,
2114                                /*129*/ 0,
2115                                        0,
2116                                        0,
2117                                        0,
2118                                        0,
2119                                /*134*/ 0,
2120                                        0,
2121                                        0,
2122                                        0,
2123                                        0,
2124                                /*139*/ MatSetBlockSizes_Default,
2125                                        0,
2126                                        0,
2127                                        0,
2128                                        0,
2129                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2130 };
2131 
2132 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2133 {
2134   Mat_MPISBAIJ   *b;
2135   PetscErrorCode ierr;
2136   PetscInt       i,mbs,Mbs;
2137 
2138   PetscFunctionBegin;
2139   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2140   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2141   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2142   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2143 
2144   b   = (Mat_MPISBAIJ*)B->data;
2145   mbs = B->rmap->n/bs;
2146   Mbs = B->rmap->N/bs;
2147   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2148 
2149   B->rmap->bs = bs;
2150   b->bs2      = bs*bs;
2151   b->mbs      = mbs;
2152   b->Mbs      = Mbs;
2153   b->nbs      = B->cmap->n/bs;
2154   b->Nbs      = B->cmap->N/bs;
2155 
2156   for (i=0; i<=b->size; i++) {
2157     b->rangebs[i] = B->rmap->range[i]/bs;
2158   }
2159   b->rstartbs = B->rmap->rstart/bs;
2160   b->rendbs   = B->rmap->rend/bs;
2161 
2162   b->cstartbs = B->cmap->rstart/bs;
2163   b->cendbs   = B->cmap->rend/bs;
2164 
2165 #if defined(PETSC_USE_CTABLE)
2166   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2167 #else
2168   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2169 #endif
2170   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2171   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2172   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2173   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2174   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2175   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2176   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2177   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2178   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2179 
2180   /* Because the B will have been resized we simply destroy it and create a new one each time */
2181   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2182   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2183   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2184   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2185   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2186 
2187   if (!B->preallocated) {
2188     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2189     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2190     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2191     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2192     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2193   }
2194 
2195   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2196   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2197 
2198   B->preallocated  = PETSC_TRUE;
2199   B->was_assembled = PETSC_FALSE;
2200   B->assembled     = PETSC_FALSE;
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2205 {
2206   PetscInt       m,rstart,cstart,cend;
2207   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2208   const PetscInt *JJ    =0;
2209   PetscScalar    *values=0;
2210   PetscErrorCode ierr;
2211 
2212   PetscFunctionBegin;
2213   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2214   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2215   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2216   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2217   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2218   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2219   m      = B->rmap->n/bs;
2220   rstart = B->rmap->rstart/bs;
2221   cstart = B->cmap->rstart/bs;
2222   cend   = B->cmap->rend/bs;
2223 
2224   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2225   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2226   for (i=0; i<m; i++) {
2227     nz = ii[i+1] - ii[i];
2228     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2229     nz_max = PetscMax(nz_max,nz);
2230     JJ     = jj + ii[i];
2231     for (j=0; j<nz; j++) {
2232       if (*JJ >= cstart) break;
2233       JJ++;
2234     }
2235     d = 0;
2236     for (; j<nz; j++) {
2237       if (*JJ++ >= cend) break;
2238       d++;
2239     }
2240     d_nnz[i] = d;
2241     o_nnz[i] = nz - d;
2242   }
2243   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2244   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2245 
2246   values = (PetscScalar*)V;
2247   if (!values) {
2248     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2249     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2250   }
2251   for (i=0; i<m; i++) {
2252     PetscInt          row    = i + rstart;
2253     PetscInt          ncols  = ii[i+1] - ii[i];
2254     const PetscInt    *icols = jj + ii[i];
2255     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2256     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2257   }
2258 
2259   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2260   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2261   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2262   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 /*MC
2267    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2268    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2269    the matrix is stored.
2270 
2271   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2272   can call MatSetOption(Mat, MAT_HERMITIAN);
2273 
2274    Options Database Keys:
2275 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2276 
2277   Level: beginner
2278 
2279 .seealso: MatCreateMPISBAIJ
2280 M*/
2281 
2282 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2283 {
2284   Mat_MPISBAIJ   *b;
2285   PetscErrorCode ierr;
2286   PetscBool      flg = PETSC_FALSE;
2287 
2288   PetscFunctionBegin;
2289   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2290   B->data = (void*)b;
2291   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2292 
2293   B->ops->destroy = MatDestroy_MPISBAIJ;
2294   B->ops->view    = MatView_MPISBAIJ;
2295   B->assembled    = PETSC_FALSE;
2296   B->insertmode   = NOT_SET_VALUES;
2297 
2298   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2299   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2300 
2301   /* build local table of row and column ownerships */
2302   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2303 
2304   /* build cache for off array entries formed */
2305   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2306 
2307   b->donotstash  = PETSC_FALSE;
2308   b->colmap      = NULL;
2309   b->garray      = NULL;
2310   b->roworiented = PETSC_TRUE;
2311 
2312   /* stuff used in block assembly */
2313   b->barray = 0;
2314 
2315   /* stuff used for matrix vector multiply */
2316   b->lvec    = 0;
2317   b->Mvctx   = 0;
2318   b->slvec0  = 0;
2319   b->slvec0b = 0;
2320   b->slvec1  = 0;
2321   b->slvec1a = 0;
2322   b->slvec1b = 0;
2323   b->sMvctx  = 0;
2324 
2325   /* stuff for MatGetRow() */
2326   b->rowindices   = 0;
2327   b->rowvalues    = 0;
2328   b->getrowactive = PETSC_FALSE;
2329 
2330   /* hash table stuff */
2331   b->ht           = 0;
2332   b->hd           = 0;
2333   b->ht_size      = 0;
2334   b->ht_flag      = PETSC_FALSE;
2335   b->ht_fact      = 0;
2336   b->ht_total_ct  = 0;
2337   b->ht_insert_ct = 0;
2338 
2339   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2340   b->ijonly = PETSC_FALSE;
2341 
2342   b->in_loc = 0;
2343   b->v_loc  = 0;
2344   b->n_loc  = 0;
2345 
2346   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2347   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2348   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2349   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2350 #if defined(PETSC_HAVE_ELEMENTAL)
2351   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2352 #endif
2353   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr);
2354   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr);
2355 
2356   B->symmetric                  = PETSC_TRUE;
2357   B->structurally_symmetric     = PETSC_TRUE;
2358   B->symmetric_set              = PETSC_TRUE;
2359   B->structurally_symmetric_set = PETSC_TRUE;
2360   B->symmetric_eternal          = PETSC_TRUE;
2361 
2362   B->hermitian                  = PETSC_FALSE;
2363   B->hermitian_set              = PETSC_FALSE;
2364 
2365   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2366   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2367   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2368   if (flg) {
2369     PetscReal fact = 1.39;
2370     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2371     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2372     if (fact <= 1.0) fact = 1.39;
2373     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2374     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2375   }
2376   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 /*MC
2381    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2382 
2383    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2384    and MATMPISBAIJ otherwise.
2385 
2386    Options Database Keys:
2387 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2388 
2389   Level: beginner
2390 
2391 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2392 M*/
2393 
2394 /*@C
2395    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2396    the user should preallocate the matrix storage by setting the parameters
2397    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2398    performance can be increased by more than a factor of 50.
2399 
2400    Collective on Mat
2401 
2402    Input Parameters:
2403 +  B - the matrix
2404 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2405           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2406 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2407            submatrix  (same for all local rows)
2408 .  d_nnz - array containing the number of block nonzeros in the various block rows
2409            in the upper triangular and diagonal part of the in diagonal portion of the local
2410            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2411            for the diagonal entry and set a value even if it is zero.
2412 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2413            submatrix (same for all local rows).
2414 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2415            off-diagonal portion of the local submatrix that is right of the diagonal
2416            (possibly different for each block row) or NULL.
2417 
2418 
2419    Options Database Keys:
2420 .   -mat_no_unroll - uses code that does not unroll the loops in the
2421                      block calculations (much slower)
2422 .   -mat_block_size - size of the blocks to use
2423 
2424    Notes:
2425 
2426    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2427    than it must be used on all processors that share the object for that argument.
2428 
2429    If the *_nnz parameter is given then the *_nz parameter is ignored
2430 
2431    Storage Information:
2432    For a square global matrix we define each processor's diagonal portion
2433    to be its local rows and the corresponding columns (a square submatrix);
2434    each processor's off-diagonal portion encompasses the remainder of the
2435    local matrix (a rectangular submatrix).
2436 
2437    The user can specify preallocated storage for the diagonal part of
2438    the local submatrix with either d_nz or d_nnz (not both).  Set
2439    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2440    memory allocation.  Likewise, specify preallocated storage for the
2441    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2442 
2443    You can call MatGetInfo() to get information on how effective the preallocation was;
2444    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2445    You can also run with the option -info and look for messages with the string
2446    malloc in them to see if additional memory allocation was needed.
2447 
2448    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2449    the figure below we depict these three local rows and all columns (0-11).
2450 
2451 .vb
2452            0 1 2 3 4 5 6 7 8 9 10 11
2453           --------------------------
2454    row 3  |. . . d d d o o o o  o  o
2455    row 4  |. . . d d d o o o o  o  o
2456    row 5  |. . . d d d o o o o  o  o
2457           --------------------------
2458 .ve
2459 
2460    Thus, any entries in the d locations are stored in the d (diagonal)
2461    submatrix, and any entries in the o locations are stored in the
2462    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2463    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2464 
2465    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2466    plus the diagonal part of the d matrix,
2467    and o_nz should indicate the number of block nonzeros per row in the o matrix
2468 
2469    In general, for PDE problems in which most nonzeros are near the diagonal,
2470    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2471    or you will get TERRIBLE performance; see the users' manual chapter on
2472    matrices.
2473 
2474    Level: intermediate
2475 
2476 .keywords: matrix, block, aij, compressed row, sparse, parallel
2477 
2478 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2479 @*/
2480 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2481 {
2482   PetscErrorCode ierr;
2483 
2484   PetscFunctionBegin;
2485   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2486   PetscValidType(B,1);
2487   PetscValidLogicalCollectiveInt(B,bs,2);
2488   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 /*@C
2493    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2494    (block compressed row).  For good matrix assembly performance
2495    the user should preallocate the matrix storage by setting the parameters
2496    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2497    performance can be increased by more than a factor of 50.
2498 
2499    Collective on MPI_Comm
2500 
2501    Input Parameters:
2502 +  comm - MPI communicator
2503 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2504           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2505 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2506            This value should be the same as the local size used in creating the
2507            y vector for the matrix-vector product y = Ax.
2508 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2509            This value should be the same as the local size used in creating the
2510            x vector for the matrix-vector product y = Ax.
2511 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2512 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2513 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2514            submatrix  (same for all local rows)
2515 .  d_nnz - array containing the number of block nonzeros in the various block rows
2516            in the upper triangular portion of the in diagonal portion of the local
2517            (possibly different for each block block row) or NULL.
2518            If you plan to factor the matrix you must leave room for the diagonal entry and
2519            set its value even if it is zero.
2520 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2521            submatrix (same for all local rows).
2522 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2523            off-diagonal portion of the local submatrix (possibly different for
2524            each block row) or NULL.
2525 
2526    Output Parameter:
2527 .  A - the matrix
2528 
2529    Options Database Keys:
2530 .   -mat_no_unroll - uses code that does not unroll the loops in the
2531                      block calculations (much slower)
2532 .   -mat_block_size - size of the blocks to use
2533 .   -mat_mpi - use the parallel matrix data structures even on one processor
2534                (defaults to using SeqBAIJ format on one processor)
2535 
2536    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2537    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2538    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2539 
2540    Notes:
2541    The number of rows and columns must be divisible by blocksize.
2542    This matrix type does not support complex Hermitian operation.
2543 
2544    The user MUST specify either the local or global matrix dimensions
2545    (possibly both).
2546 
2547    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2548    than it must be used on all processors that share the object for that argument.
2549 
2550    If the *_nnz parameter is given then the *_nz parameter is ignored
2551 
2552    Storage Information:
2553    For a square global matrix we define each processor's diagonal portion
2554    to be its local rows and the corresponding columns (a square submatrix);
2555    each processor's off-diagonal portion encompasses the remainder of the
2556    local matrix (a rectangular submatrix).
2557 
2558    The user can specify preallocated storage for the diagonal part of
2559    the local submatrix with either d_nz or d_nnz (not both).  Set
2560    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2561    memory allocation.  Likewise, specify preallocated storage for the
2562    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2563 
2564    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2565    the figure below we depict these three local rows and all columns (0-11).
2566 
2567 .vb
2568            0 1 2 3 4 5 6 7 8 9 10 11
2569           --------------------------
2570    row 3  |. . . d d d o o o o  o  o
2571    row 4  |. . . d d d o o o o  o  o
2572    row 5  |. . . d d d o o o o  o  o
2573           --------------------------
2574 .ve
2575 
2576    Thus, any entries in the d locations are stored in the d (diagonal)
2577    submatrix, and any entries in the o locations are stored in the
2578    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2579    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2580 
2581    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2582    plus the diagonal part of the d matrix,
2583    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2584    In general, for PDE problems in which most nonzeros are near the diagonal,
2585    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2586    or you will get TERRIBLE performance; see the users' manual chapter on
2587    matrices.
2588 
2589    Level: intermediate
2590 
2591 .keywords: matrix, block, aij, compressed row, sparse, parallel
2592 
2593 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2594 @*/
2595 
2596 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2597 {
2598   PetscErrorCode ierr;
2599   PetscMPIInt    size;
2600 
2601   PetscFunctionBegin;
2602   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2603   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2604   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2605   if (size > 1) {
2606     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2607     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2608   } else {
2609     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2610     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2611   }
2612   PetscFunctionReturn(0);
2613 }
2614 
2615 
2616 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2617 {
2618   Mat            mat;
2619   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2620   PetscErrorCode ierr;
2621   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2622   PetscScalar    *array;
2623 
2624   PetscFunctionBegin;
2625   *newmat = 0;
2626 
2627   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2628   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2629   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2630   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2631   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2632 
2633   mat->factortype   = matin->factortype;
2634   mat->preallocated = PETSC_TRUE;
2635   mat->assembled    = PETSC_TRUE;
2636   mat->insertmode   = NOT_SET_VALUES;
2637 
2638   a      = (Mat_MPISBAIJ*)mat->data;
2639   a->bs2 = oldmat->bs2;
2640   a->mbs = oldmat->mbs;
2641   a->nbs = oldmat->nbs;
2642   a->Mbs = oldmat->Mbs;
2643   a->Nbs = oldmat->Nbs;
2644 
2645   a->size         = oldmat->size;
2646   a->rank         = oldmat->rank;
2647   a->donotstash   = oldmat->donotstash;
2648   a->roworiented  = oldmat->roworiented;
2649   a->rowindices   = 0;
2650   a->rowvalues    = 0;
2651   a->getrowactive = PETSC_FALSE;
2652   a->barray       = 0;
2653   a->rstartbs     = oldmat->rstartbs;
2654   a->rendbs       = oldmat->rendbs;
2655   a->cstartbs     = oldmat->cstartbs;
2656   a->cendbs       = oldmat->cendbs;
2657 
2658   /* hash table stuff */
2659   a->ht           = 0;
2660   a->hd           = 0;
2661   a->ht_size      = 0;
2662   a->ht_flag      = oldmat->ht_flag;
2663   a->ht_fact      = oldmat->ht_fact;
2664   a->ht_total_ct  = 0;
2665   a->ht_insert_ct = 0;
2666 
2667   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2668   if (oldmat->colmap) {
2669 #if defined(PETSC_USE_CTABLE)
2670     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2671 #else
2672     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2673     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2674     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2675 #endif
2676   } else a->colmap = 0;
2677 
2678   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2679     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2680     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2681     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2682   } else a->garray = 0;
2683 
2684   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2685   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2686   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2687   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2688   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2689 
2690   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2691   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2692   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2693   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2694 
2695   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2696   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2697   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2698   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2699   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2700   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2701   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2702   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2703   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2704   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2705   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2706   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2707   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2708 
2709   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2710   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2711   a->sMvctx = oldmat->sMvctx;
2712   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2713 
2714   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2715   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2716   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2717   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2718   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2719   *newmat = mat;
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2724 {
2725   PetscErrorCode ierr;
2726   PetscInt       i,nz,j,rstart,rend;
2727   PetscScalar    *vals,*buf;
2728   MPI_Comm       comm;
2729   MPI_Status     status;
2730   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2731   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2732   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2733   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2734   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2735   PetscInt       dcount,kmax,k,nzcount,tmp;
2736   int            fd;
2737   PetscBool      isbinary;
2738 
2739   PetscFunctionBegin;
2740   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2741   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
2742 
2743   /* force binary viewer to load .info file if it has not yet done so */
2744   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2745   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2746   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2747   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2748   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2749   if (bs < 0) bs = 1;
2750 
2751   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2752   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2753   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2754   if (!rank) {
2755     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2756     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2757     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2758   }
2759 
2760   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2761   M    = header[1];
2762   N    = header[2];
2763 
2764   /* If global sizes are set, check if they are consistent with that given in the file */
2765   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2766   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2767 
2768   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2769 
2770   /*
2771      This code adds extra rows to make sure the number of rows is
2772      divisible by the blocksize
2773   */
2774   Mbs        = M/bs;
2775   extra_rows = bs - M + bs*(Mbs);
2776   if (extra_rows == bs) extra_rows = 0;
2777   else                  Mbs++;
2778   if (extra_rows &&!rank) {
2779     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2780   }
2781 
2782   /* determine ownership of all rows */
2783   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2784     mbs = Mbs/size + ((Mbs % size) > rank);
2785     m   = mbs*bs;
2786   } else { /* User Set */
2787     m   = newmat->rmap->n;
2788     mbs = m/bs;
2789   }
2790   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2791   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2792   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2793   rowners[0] = 0;
2794   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2795   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2796   rstart = rowners[rank];
2797   rend   = rowners[rank+1];
2798 
2799   /* distribute row lengths to all processors */
2800   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2801   if (!rank) {
2802     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2803     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2804     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2805     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2806     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2807     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2808     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2809   } else {
2810     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2811   }
2812 
2813   if (!rank) {   /* procs[0] */
2814     /* calculate the number of nonzeros on each processor */
2815     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2816     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2817     for (i=0; i<size; i++) {
2818       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2819         procsnz[i] += rowlengths[j];
2820       }
2821     }
2822     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2823 
2824     /* determine max buffer needed and allocate it */
2825     maxnz = 0;
2826     for (i=0; i<size; i++) {
2827       maxnz = PetscMax(maxnz,procsnz[i]);
2828     }
2829     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2830 
2831     /* read in my part of the matrix column indices  */
2832     nz     = procsnz[0];
2833     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2834     mycols = ibuf;
2835     if (size == 1) nz -= extra_rows;
2836     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2837     if (size == 1) {
2838       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2839     }
2840 
2841     /* read in every ones (except the last) and ship off */
2842     for (i=1; i<size-1; i++) {
2843       nz   = procsnz[i];
2844       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2845       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2846     }
2847     /* read in the stuff for the last proc */
2848     if (size != 1) {
2849       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2850       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2851       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2852       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2853     }
2854     ierr = PetscFree(cols);CHKERRQ(ierr);
2855   } else {  /* procs[i], i>0 */
2856     /* determine buffer space needed for message */
2857     nz = 0;
2858     for (i=0; i<m; i++) nz += locrowlens[i];
2859     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2860     mycols = ibuf;
2861     /* receive message of column indices*/
2862     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2863     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2864     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2865   }
2866 
2867   /* loop over local rows, determining number of off diagonal entries */
2868   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2869   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2870   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2871   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2872   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2873   rowcount = 0;
2874   nzcount  = 0;
2875   for (i=0; i<mbs; i++) {
2876     dcount  = 0;
2877     odcount = 0;
2878     for (j=0; j<bs; j++) {
2879       kmax = locrowlens[rowcount];
2880       for (k=0; k<kmax; k++) {
2881         tmp = mycols[nzcount++]/bs; /* block col. index */
2882         if (!mask[tmp]) {
2883           mask[tmp] = 1;
2884           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2885           else masked1[dcount++] = tmp; /* entry in diag portion */
2886         }
2887       }
2888       rowcount++;
2889     }
2890 
2891     dlens[i]  = dcount;  /* d_nzz[i] */
2892     odlens[i] = odcount; /* o_nzz[i] */
2893 
2894     /* zero out the mask elements we set */
2895     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2896     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2897   }
2898   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2899   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2900   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2901 
2902   if (!rank) {
2903     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2904     /* read in my part of the matrix numerical values  */
2905     nz     = procsnz[0];
2906     vals   = buf;
2907     mycols = ibuf;
2908     if (size == 1) nz -= extra_rows;
2909     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2910     if (size == 1) {
2911       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2912     }
2913 
2914     /* insert into matrix */
2915     jj = rstart*bs;
2916     for (i=0; i<m; i++) {
2917       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2918       mycols += locrowlens[i];
2919       vals   += locrowlens[i];
2920       jj++;
2921     }
2922 
2923     /* read in other processors (except the last one) and ship out */
2924     for (i=1; i<size-1; i++) {
2925       nz   = procsnz[i];
2926       vals = buf;
2927       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2928       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2929     }
2930     /* the last proc */
2931     if (size != 1) {
2932       nz   = procsnz[i] - extra_rows;
2933       vals = buf;
2934       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2935       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2936       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2937     }
2938     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2939 
2940   } else {
2941     /* receive numeric values */
2942     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2943 
2944     /* receive message of values*/
2945     vals   = buf;
2946     mycols = ibuf;
2947     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2948     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2949     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2950 
2951     /* insert into matrix */
2952     jj = rstart*bs;
2953     for (i=0; i<m; i++) {
2954       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2955       mycols += locrowlens[i];
2956       vals   += locrowlens[i];
2957       jj++;
2958     }
2959   }
2960 
2961   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2962   ierr = PetscFree(buf);CHKERRQ(ierr);
2963   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2964   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2965   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2966   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2967   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2968   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2969   PetscFunctionReturn(0);
2970 }
2971 
2972 /*XXXXX@
2973    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2974 
2975    Input Parameters:
2976 .  mat  - the matrix
2977 .  fact - factor
2978 
2979    Not Collective on Mat, each process can have a different hash factor
2980 
2981    Level: advanced
2982 
2983   Notes:
2984    This can also be set by the command line option: -mat_use_hash_table fact
2985 
2986 .keywords: matrix, hashtable, factor, HT
2987 
2988 .seealso: MatSetOption()
2989 @XXXXX*/
2990 
2991 
2992 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2993 {
2994   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2995   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2996   PetscReal      atmp;
2997   PetscReal      *work,*svalues,*rvalues;
2998   PetscErrorCode ierr;
2999   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
3000   PetscMPIInt    rank,size;
3001   PetscInt       *rowners_bs,dest,count,source;
3002   PetscScalar    *va;
3003   MatScalar      *ba;
3004   MPI_Status     stat;
3005 
3006   PetscFunctionBegin;
3007   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
3008   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
3009   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3010 
3011   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3012   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3013 
3014   bs  = A->rmap->bs;
3015   mbs = a->mbs;
3016   Mbs = a->Mbs;
3017   ba  = b->a;
3018   bi  = b->i;
3019   bj  = b->j;
3020 
3021   /* find ownerships */
3022   rowners_bs = A->rmap->range;
3023 
3024   /* each proc creates an array to be distributed */
3025   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
3026   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
3027 
3028   /* row_max for B */
3029   if (rank != size-1) {
3030     for (i=0; i<mbs; i++) {
3031       ncols = bi[1] - bi[0]; bi++;
3032       brow  = bs*i;
3033       for (j=0; j<ncols; j++) {
3034         bcol = bs*(*bj);
3035         for (kcol=0; kcol<bs; kcol++) {
3036           col  = bcol + kcol;                /* local col index */
3037           col += rowners_bs[rank+1];      /* global col index */
3038           for (krow=0; krow<bs; krow++) {
3039             atmp = PetscAbsScalar(*ba); ba++;
3040             row  = brow + krow;   /* local row index */
3041             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
3042             if (work[col] < atmp) work[col] = atmp;
3043           }
3044         }
3045         bj++;
3046       }
3047     }
3048 
3049     /* send values to its owners */
3050     for (dest=rank+1; dest<size; dest++) {
3051       svalues = work + rowners_bs[dest];
3052       count   = rowners_bs[dest+1]-rowners_bs[dest];
3053       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
3054     }
3055   }
3056 
3057   /* receive values */
3058   if (rank) {
3059     rvalues = work;
3060     count   = rowners_bs[rank+1]-rowners_bs[rank];
3061     for (source=0; source<rank; source++) {
3062       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
3063       /* process values */
3064       for (i=0; i<count; i++) {
3065         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
3066       }
3067     }
3068   }
3069 
3070   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3071   ierr = PetscFree(work);CHKERRQ(ierr);
3072   PetscFunctionReturn(0);
3073 }
3074 
3075 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3076 {
3077   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
3078   PetscErrorCode    ierr;
3079   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
3080   PetscScalar       *x,*ptr,*from;
3081   Vec               bb1;
3082   const PetscScalar *b;
3083 
3084   PetscFunctionBegin;
3085   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3086   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3087 
3088   if (flag == SOR_APPLY_UPPER) {
3089     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3090     PetscFunctionReturn(0);
3091   }
3092 
3093   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3094     if (flag & SOR_ZERO_INITIAL_GUESS) {
3095       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3096       its--;
3097     }
3098 
3099     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3100     while (its--) {
3101 
3102       /* lower triangular part: slvec0b = - B^T*xx */
3103       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3104 
3105       /* copy xx into slvec0a */
3106       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3107       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3108       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3109       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3110 
3111       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
3112 
3113       /* copy bb into slvec1a */
3114       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3115       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3116       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3117       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3118 
3119       /* set slvec1b = 0 */
3120       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3121 
3122       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3123       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3124       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3125       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3126 
3127       /* upper triangular part: bb1 = bb1 - B*x */
3128       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
3129 
3130       /* local diagonal sweep */
3131       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3132     }
3133     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3134   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3135     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3136   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3137     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3138   } else if (flag & SOR_EISENSTAT) {
3139     Vec               xx1;
3140     PetscBool         hasop;
3141     const PetscScalar *diag;
3142     PetscScalar       *sl,scale = (omega - 2.0)/omega;
3143     PetscInt          i,n;
3144 
3145     if (!mat->xx1) {
3146       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
3147       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
3148     }
3149     xx1 = mat->xx1;
3150     bb1 = mat->bb1;
3151 
3152     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
3153 
3154     if (!mat->diag) {
3155       /* this is wrong for same matrix with new nonzero values */
3156       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
3157       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
3158     }
3159     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
3160 
3161     if (hasop) {
3162       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
3163       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3164     } else {
3165       /*
3166           These two lines are replaced by code that may be a bit faster for a good compiler
3167       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
3168       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3169       */
3170       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3171       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3172       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3173       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3174       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
3175       if (omega == 1.0) {
3176         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3177         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
3178       } else {
3179         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3180         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
3181       }
3182       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3183       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3184       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3185       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3186     }
3187 
3188     /* multiply off-diagonal portion of matrix */
3189     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3190     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3191     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
3192     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3193     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3194     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
3195     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3196     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3197     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3198     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
3199 
3200     /* local sweep */
3201     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
3202     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
3203   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3204   PetscFunctionReturn(0);
3205 }
3206 
3207 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3208 {
3209   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3210   PetscErrorCode ierr;
3211   Vec            lvec1,bb1;
3212 
3213   PetscFunctionBegin;
3214   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3215   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3216 
3217   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3218     if (flag & SOR_ZERO_INITIAL_GUESS) {
3219       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3220       its--;
3221     }
3222 
3223     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3224     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3225     while (its--) {
3226       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3227 
3228       /* lower diagonal part: bb1 = bb - B^T*xx */
3229       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3230       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3231 
3232       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3233       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3234       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3235 
3236       /* upper diagonal part: bb1 = bb1 - B*x */
3237       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3238       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3239 
3240       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3241 
3242       /* diagonal sweep */
3243       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3244     }
3245     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3246     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3247   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3248   PetscFunctionReturn(0);
3249 }
3250 
3251 /*@
3252      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3253          CSR format the local rows.
3254 
3255    Collective on MPI_Comm
3256 
3257    Input Parameters:
3258 +  comm - MPI communicator
3259 .  bs - the block size, only a block size of 1 is supported
3260 .  m - number of local rows (Cannot be PETSC_DECIDE)
3261 .  n - This value should be the same as the local size used in creating the
3262        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3263        calculated if N is given) For square matrices n is almost always m.
3264 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3265 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3266 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
3267 .   j - column indices
3268 -   a - matrix values
3269 
3270    Output Parameter:
3271 .   mat - the matrix
3272 
3273    Level: intermediate
3274 
3275    Notes:
3276        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3277      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3278      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3279 
3280        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3281 
3282 .keywords: matrix, aij, compressed row, sparse, parallel
3283 
3284 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3285           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3286 @*/
3287 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3288 {
3289   PetscErrorCode ierr;
3290 
3291 
3292   PetscFunctionBegin;
3293   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3294   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3295   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3296   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3297   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3298   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3299   PetscFunctionReturn(0);
3300 }
3301 
3302 
3303 /*@C
3304    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3305    (the default parallel PETSc format).
3306 
3307    Collective on MPI_Comm
3308 
3309    Input Parameters:
3310 +  B - the matrix
3311 .  bs - the block size
3312 .  i - the indices into j for the start of each local row (starts with zero)
3313 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3314 -  v - optional values in the matrix
3315 
3316    Level: developer
3317 
3318 .keywords: matrix, aij, compressed row, sparse, parallel
3319 
3320 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3321 @*/
3322 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3323 {
3324   PetscErrorCode ierr;
3325 
3326   PetscFunctionBegin;
3327   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3328   PetscFunctionReturn(0);
3329 }
3330 
3331 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3332 {
3333   PetscErrorCode ierr;
3334   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3335   PetscInt       *indx;
3336   PetscScalar    *values;
3337 
3338   PetscFunctionBegin;
3339   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3340   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3341     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3342     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3343     PetscInt       *bindx,rmax=a->rmax,j;
3344 
3345     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3346     mbs = m/bs; Nbs = N/cbs;
3347     if (n == PETSC_DECIDE) {
3348       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3349     }
3350     /* Check sum(n) = Nbs */
3351     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3352     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3353 
3354     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3355     rstart -= mbs;
3356 
3357     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3358     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3359     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3360     for (i=0; i<mbs; i++) {
3361       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3362       nnz  = nnz/bs;
3363       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3364       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3365       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3366     }
3367     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3368     ierr = PetscFree(bindx);CHKERRQ(ierr);
3369 
3370     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3371     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3372     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3373     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3374     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3375     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3376   }
3377 
3378   /* numeric phase */
3379   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3380   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3381 
3382   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3383   for (i=0; i<m; i++) {
3384     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3385     Ii   = i + rstart;
3386     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3387     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3388   }
3389   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3390   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3391   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3392   PetscFunctionReturn(0);
3393 }
3394