xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 7fc084c7ff669112ab219b565c558ccf433b7539)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 /* This could be moved to matimpl.h */
12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
13 {
14   Mat            preallocator;
15   PetscInt       r,rstart,rend;
16   PetscInt       bs,i,m,n,M,N;
17   PetscBool      cong = PETSC_TRUE;
18   PetscErrorCode ierr;
19 
20   PetscFunctionBegin;
21   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
22   PetscValidLogicalCollectiveInt(B,nm,2);
23   for (i = 0; i < nm; i++) {
24     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
25     ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr);
26     if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
27   }
28   PetscValidLogicalCollectiveBool(B,fill,5);
29   ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr);
30   ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr);
31   ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
32   ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr);
33   ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
34   ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr);
35   ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
36   ierr = MatSetUp(preallocator);CHKERRQ(ierr);
37   ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
38   for (r = rstart; r < rend; ++r) {
39     PetscInt          ncols;
40     const PetscInt    *row;
41     const PetscScalar *vals;
42 
43     for (i = 0; i < nm; i++) {
44       ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
45       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
46       if (symm && symm[i]) {
47         ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
48       }
49       ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
50     }
51   }
52   ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
53   ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
54   ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr);
55   ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 
59 static PetscErrorCode MatConvert_MPISBAIJ_XAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
60 {
61   Mat            B;
62   PetscErrorCode ierr;
63   PetscInt       r;
64 
65   PetscFunctionBegin;
66   if (reuse != MAT_REUSE_MATRIX) {
67     PetscBool symm = PETSC_TRUE;
68     PetscInt  bs;
69 
70     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
71     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
72     ierr = MatSetType(B,newtype);CHKERRQ(ierr);
73     ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
74     ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr);
75     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
76     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
77     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
78     ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr);
79     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
80   } else B = *newmat;
81 
82   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
83   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
84     PetscInt          ncols;
85     const PetscInt    *row;
86     const PetscScalar *vals;
87 
88     ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
89     ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
90     ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
91     ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
92   }
93   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
94   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
95   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
96 
97   if (reuse == MAT_INPLACE_MATRIX) {
98     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
99   } else {
100     *newmat = B;
101   }
102   PetscFunctionReturn(0);
103 }
104 
105 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
106 {
107   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
108   PetscErrorCode ierr;
109 
110   PetscFunctionBegin;
111   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
112   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
117 {
118   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
119   PetscErrorCode ierr;
120 
121   PetscFunctionBegin;
122   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
123   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
124   PetscFunctionReturn(0);
125 }
126 
127 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
128   { \
129     brow = row/bs;  \
130     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
131     rmax = aimax[brow]; nrow = ailen[brow]; \
132     bcol = col/bs; \
133     ridx = row % bs; cidx = col % bs; \
134     low  = 0; high = nrow; \
135     while (high-low > 3) { \
136       t = (low+high)/2; \
137       if (rp[t] > bcol) high = t; \
138       else              low  = t; \
139     } \
140     for (_i=low; _i<high; _i++) { \
141       if (rp[_i] > bcol) break; \
142       if (rp[_i] == bcol) { \
143         bap = ap + bs2*_i + bs*cidx + ridx; \
144         if (addv == ADD_VALUES) *bap += value;  \
145         else                    *bap  = value;  \
146         goto a_noinsert; \
147       } \
148     } \
149     if (a->nonew == 1) goto a_noinsert; \
150     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
151     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
152     N = nrow++ - 1;  \
153     /* shift up all the later entries in this row */ \
154     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
155     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
156     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
157     rp[_i]                      = bcol;  \
158     ap[bs2*_i + bs*cidx + ridx] = value;  \
159     A->nonzerostate++;\
160 a_noinsert:; \
161     ailen[brow] = nrow; \
162   }
163 
164 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
165   { \
166     brow = row/bs;  \
167     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
168     rmax = bimax[brow]; nrow = bilen[brow]; \
169     bcol = col/bs; \
170     ridx = row % bs; cidx = col % bs; \
171     low  = 0; high = nrow; \
172     while (high-low > 3) { \
173       t = (low+high)/2; \
174       if (rp[t] > bcol) high = t; \
175       else              low  = t; \
176     } \
177     for (_i=low; _i<high; _i++) { \
178       if (rp[_i] > bcol) break; \
179       if (rp[_i] == bcol) { \
180         bap = ap + bs2*_i + bs*cidx + ridx; \
181         if (addv == ADD_VALUES) *bap += value;  \
182         else                    *bap  = value;  \
183         goto b_noinsert; \
184       } \
185     } \
186     if (b->nonew == 1) goto b_noinsert; \
187     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
188     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
189     N = nrow++ - 1;  \
190     /* shift up all the later entries in this row */ \
191     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
192     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
193     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \
194     rp[_i]                      = bcol;  \
195     ap[bs2*_i + bs*cidx + ridx] = value;  \
196     B->nonzerostate++;\
197 b_noinsert:; \
198     bilen[brow] = nrow; \
199   }
200 
201 /* Only add/insert a(i,j) with i<=j (blocks).
202    Any a(i,j) with i>j input by user is ingored or generates an error
203 */
204 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
205 {
206   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
207   MatScalar      value;
208   PetscBool      roworiented = baij->roworiented;
209   PetscErrorCode ierr;
210   PetscInt       i,j,row,col;
211   PetscInt       rstart_orig=mat->rmap->rstart;
212   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
213   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
214 
215   /* Some Variables required in the macro */
216   Mat          A     = baij->A;
217   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
218   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
219   MatScalar    *aa   =a->a;
220 
221   Mat         B     = baij->B;
222   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
223   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
224   MatScalar   *ba   =b->a;
225 
226   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
227   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
228   MatScalar *ap,*bap;
229 
230   /* for stash */
231   PetscInt  n_loc, *in_loc = NULL;
232   MatScalar *v_loc = NULL;
233 
234   PetscFunctionBegin;
235   if (!baij->donotstash) {
236     if (n > baij->n_loc) {
237       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
238       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
239       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
240       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
241 
242       baij->n_loc = n;
243     }
244     in_loc = baij->in_loc;
245     v_loc  = baij->v_loc;
246   }
247 
248   for (i=0; i<m; i++) {
249     if (im[i] < 0) continue;
250 #if defined(PETSC_USE_DEBUG)
251     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
252 #endif
253     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
254       row = im[i] - rstart_orig;              /* local row index */
255       for (j=0; j<n; j++) {
256         if (im[i]/bs > in[j]/bs) {
257           if (a->ignore_ltriangular) {
258             continue;    /* ignore lower triangular blocks */
259           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
260         }
261         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
262           col  = in[j] - cstart_orig;         /* local col index */
263           brow = row/bs; bcol = col/bs;
264           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
265           if (roworiented) value = v[i*n+j];
266           else             value = v[i+j*m];
267           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
268           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
269         } else if (in[j] < 0) continue;
270 #if defined(PETSC_USE_DEBUG)
271         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
272 #endif
273         else {  /* off-diag entry (B) */
274           if (mat->was_assembled) {
275             if (!baij->colmap) {
276               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
277             }
278 #if defined(PETSC_USE_CTABLE)
279             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
280             col  = col - 1;
281 #else
282             col = baij->colmap[in[j]/bs] - 1;
283 #endif
284             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
285               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
286               col  =  in[j];
287               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
288               B    = baij->B;
289               b    = (Mat_SeqBAIJ*)(B)->data;
290               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
291               ba   = b->a;
292             } else col += in[j]%bs;
293           } else col = in[j];
294           if (roworiented) value = v[i*n+j];
295           else             value = v[i+j*m];
296           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
297           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
298         }
299       }
300     } else {  /* off processor entry */
301       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
302       if (!baij->donotstash) {
303         mat->assembled = PETSC_FALSE;
304         n_loc          = 0;
305         for (j=0; j<n; j++) {
306           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
307           in_loc[n_loc] = in[j];
308           if (roworiented) {
309             v_loc[n_loc] = v[i*n+j];
310           } else {
311             v_loc[n_loc] = v[j*m+i];
312           }
313           n_loc++;
314         }
315         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
316       }
317     }
318   }
319   PetscFunctionReturn(0);
320 }
321 
322 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
323 {
324   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
325   PetscErrorCode    ierr;
326   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
327   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
328   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
329   PetscBool         roworiented=a->roworiented;
330   const PetscScalar *value     = v;
331   MatScalar         *ap,*aa = a->a,*bap;
332 
333   PetscFunctionBegin;
334   if (col < row) {
335     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
336     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
337   }
338   rp   = aj + ai[row];
339   ap   = aa + bs2*ai[row];
340   rmax = imax[row];
341   nrow = ailen[row];
342   value = v;
343   low   = 0;
344   high  = nrow;
345 
346   while (high-low > 7) {
347     t = (low+high)/2;
348     if (rp[t] > col) high = t;
349     else             low  = t;
350   }
351   for (i=low; i<high; i++) {
352     if (rp[i] > col) break;
353     if (rp[i] == col) {
354       bap = ap +  bs2*i;
355       if (roworiented) {
356         if (is == ADD_VALUES) {
357           for (ii=0; ii<bs; ii++) {
358             for (jj=ii; jj<bs2; jj+=bs) {
359               bap[jj] += *value++;
360             }
361           }
362         } else {
363           for (ii=0; ii<bs; ii++) {
364             for (jj=ii; jj<bs2; jj+=bs) {
365               bap[jj] = *value++;
366             }
367           }
368         }
369       } else {
370         if (is == ADD_VALUES) {
371           for (ii=0; ii<bs; ii++) {
372             for (jj=0; jj<bs; jj++) {
373               *bap++ += *value++;
374             }
375           }
376         } else {
377           for (ii=0; ii<bs; ii++) {
378             for (jj=0; jj<bs; jj++) {
379               *bap++  = *value++;
380             }
381           }
382         }
383       }
384       goto noinsert2;
385     }
386   }
387   if (nonew == 1) goto noinsert2;
388   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
389   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
390   N = nrow++ - 1; high++;
391   /* shift up all the later entries in this row */
392   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
393   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
394   rp[i] = col;
395   bap   = ap +  bs2*i;
396   if (roworiented) {
397     for (ii=0; ii<bs; ii++) {
398       for (jj=ii; jj<bs2; jj+=bs) {
399         bap[jj] = *value++;
400       }
401     }
402   } else {
403     for (ii=0; ii<bs; ii++) {
404       for (jj=0; jj<bs; jj++) {
405         *bap++ = *value++;
406       }
407     }
408   }
409   noinsert2:;
410   ailen[row] = nrow;
411   PetscFunctionReturn(0);
412 }
413 
414 /*
415    This routine is exactly duplicated in mpibaij.c
416 */
417 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
418 {
419   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
420   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
421   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
422   PetscErrorCode    ierr;
423   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
424   PetscBool         roworiented=a->roworiented;
425   const PetscScalar *value     = v;
426   MatScalar         *ap,*aa = a->a,*bap;
427 
428   PetscFunctionBegin;
429   rp   = aj + ai[row];
430   ap   = aa + bs2*ai[row];
431   rmax = imax[row];
432   nrow = ailen[row];
433   low  = 0;
434   high = nrow;
435   value = v;
436   while (high-low > 7) {
437     t = (low+high)/2;
438     if (rp[t] > col) high = t;
439     else             low  = t;
440   }
441   for (i=low; i<high; i++) {
442     if (rp[i] > col) break;
443     if (rp[i] == col) {
444       bap = ap +  bs2*i;
445       if (roworiented) {
446         if (is == ADD_VALUES) {
447           for (ii=0; ii<bs; ii++) {
448             for (jj=ii; jj<bs2; jj+=bs) {
449               bap[jj] += *value++;
450             }
451           }
452         } else {
453           for (ii=0; ii<bs; ii++) {
454             for (jj=ii; jj<bs2; jj+=bs) {
455               bap[jj] = *value++;
456             }
457           }
458         }
459       } else {
460         if (is == ADD_VALUES) {
461           for (ii=0; ii<bs; ii++,value+=bs) {
462             for (jj=0; jj<bs; jj++) {
463               bap[jj] += value[jj];
464             }
465             bap += bs;
466           }
467         } else {
468           for (ii=0; ii<bs; ii++,value+=bs) {
469             for (jj=0; jj<bs; jj++) {
470               bap[jj]  = value[jj];
471             }
472             bap += bs;
473           }
474         }
475       }
476       goto noinsert2;
477     }
478   }
479   if (nonew == 1) goto noinsert2;
480   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
481   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
482   N = nrow++ - 1; high++;
483   /* shift up all the later entries in this row */
484   ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
485   ierr  = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
486   rp[i] = col;
487   bap   = ap +  bs2*i;
488   if (roworiented) {
489     for (ii=0; ii<bs; ii++) {
490       for (jj=ii; jj<bs2; jj+=bs) {
491         bap[jj] = *value++;
492       }
493     }
494   } else {
495     for (ii=0; ii<bs; ii++) {
496       for (jj=0; jj<bs; jj++) {
497         *bap++ = *value++;
498       }
499     }
500   }
501   noinsert2:;
502   ailen[row] = nrow;
503   PetscFunctionReturn(0);
504 }
505 
506 /*
507     This routine could be optimized by removing the need for the block copy below and passing stride information
508   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
509 */
510 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
511 {
512   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
513   const MatScalar *value;
514   MatScalar       *barray     =baij->barray;
515   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
516   PetscErrorCode  ierr;
517   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
518   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
519   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
520 
521   PetscFunctionBegin;
522   if (!barray) {
523     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
524     baij->barray = barray;
525   }
526 
527   if (roworiented) {
528     stepval = (n-1)*bs;
529   } else {
530     stepval = (m-1)*bs;
531   }
532   for (i=0; i<m; i++) {
533     if (im[i] < 0) continue;
534 #if defined(PETSC_USE_DEBUG)
535     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
536 #endif
537     if (im[i] >= rstart && im[i] < rend) {
538       row = im[i] - rstart;
539       for (j=0; j<n; j++) {
540         if (im[i] > in[j]) {
541           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
542           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
543         }
544         /* If NumCol = 1 then a copy is not required */
545         if ((roworiented) && (n == 1)) {
546           barray = (MatScalar*) v + i*bs2;
547         } else if ((!roworiented) && (m == 1)) {
548           barray = (MatScalar*) v + j*bs2;
549         } else { /* Here a copy is required */
550           if (roworiented) {
551             value = v + i*(stepval+bs)*bs + j*bs;
552           } else {
553             value = v + j*(stepval+bs)*bs + i*bs;
554           }
555           for (ii=0; ii<bs; ii++,value+=stepval) {
556             for (jj=0; jj<bs; jj++) {
557               *barray++ = *value++;
558             }
559           }
560           barray -=bs2;
561         }
562 
563         if (in[j] >= cstart && in[j] < cend) {
564           col  = in[j] - cstart;
565           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
566         } else if (in[j] < 0) continue;
567 #if defined(PETSC_USE_DEBUG)
568         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
569 #endif
570         else {
571           if (mat->was_assembled) {
572             if (!baij->colmap) {
573               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
574             }
575 
576 #if defined(PETSC_USE_DEBUG)
577 #if defined(PETSC_USE_CTABLE)
578             { PetscInt data;
579               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
580               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
581             }
582 #else
583             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
584 #endif
585 #endif
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col  = (col - 1)/bs;
589 #else
590             col = (baij->colmap[in[j]] - 1)/bs;
591 #endif
592             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
593               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
594               col  = in[j];
595             }
596           } else col = in[j];
597           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
598         }
599       }
600     } else {
601       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
602       if (!baij->donotstash) {
603         if (roworiented) {
604           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
605         } else {
606           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
607         }
608       }
609     }
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
615 {
616   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
617   PetscErrorCode ierr;
618   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
619   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
620 
621   PetscFunctionBegin;
622   for (i=0; i<m; i++) {
623     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
624     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
625     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
626       row = idxm[i] - bsrstart;
627       for (j=0; j<n; j++) {
628         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
629         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
630         if (idxn[j] >= bscstart && idxn[j] < bscend) {
631           col  = idxn[j] - bscstart;
632           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633         } else {
634           if (!baij->colmap) {
635             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
636           }
637 #if defined(PETSC_USE_CTABLE)
638           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
639           data--;
640 #else
641           data = baij->colmap[idxn[j]/bs]-1;
642 #endif
643           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
644           else {
645             col  = data + idxn[j]%bs;
646             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647           }
648         }
649       }
650     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
651   }
652   PetscFunctionReturn(0);
653 }
654 
655 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
656 {
657   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
658   PetscErrorCode ierr;
659   PetscReal      sum[2],*lnorm2;
660 
661   PetscFunctionBegin;
662   if (baij->size == 1) {
663     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
664   } else {
665     if (type == NORM_FROBENIUS) {
666       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
667       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
668       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
669       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
670       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
671       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
672       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
673       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
674     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
675       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
676       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
677       PetscReal    *rsum,*rsum2,vabs;
678       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
679       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
680       MatScalar    *v;
681 
682       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
683       ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr);
684       /* Amat */
685       v = amat->a; jj = amat->j;
686       for (brow=0; brow<mbs; brow++) {
687         grow = bs*(rstart + brow);
688         nz   = amat->i[brow+1] - amat->i[brow];
689         for (bcol=0; bcol<nz; bcol++) {
690           gcol = bs*(rstart + *jj); jj++;
691           for (col=0; col<bs; col++) {
692             for (row=0; row<bs; row++) {
693               vabs            = PetscAbsScalar(*v); v++;
694               rsum[gcol+col] += vabs;
695               /* non-diagonal block */
696               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
697             }
698           }
699         }
700         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
701       }
702       /* Bmat */
703       v = bmat->a; jj = bmat->j;
704       for (brow=0; brow<mbs; brow++) {
705         grow = bs*(rstart + brow);
706         nz = bmat->i[brow+1] - bmat->i[brow];
707         for (bcol=0; bcol<nz; bcol++) {
708           gcol = bs*garray[*jj]; jj++;
709           for (col=0; col<bs; col++) {
710             for (row=0; row<bs; row++) {
711               vabs            = PetscAbsScalar(*v); v++;
712               rsum[gcol+col] += vabs;
713               rsum[grow+row] += vabs;
714             }
715           }
716         }
717         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
718       }
719       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
720       *norm = 0.0;
721       for (col=0; col<mat->cmap->N; col++) {
722         if (rsum2[col] > *norm) *norm = rsum2[col];
723       }
724       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
725     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
726   }
727   PetscFunctionReturn(0);
728 }
729 
730 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
731 {
732   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
733   PetscErrorCode ierr;
734   PetscInt       nstash,reallocs;
735 
736   PetscFunctionBegin;
737   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
738 
739   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
740   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
741   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
742   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
743   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
744   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
745   PetscFunctionReturn(0);
746 }
747 
748 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
749 {
750   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
751   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
752   PetscErrorCode ierr;
753   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
754   PetscInt       *row,*col;
755   PetscBool      other_disassembled;
756   PetscMPIInt    n;
757   PetscBool      r1,r2,r3;
758   MatScalar      *val;
759 
760   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
761   PetscFunctionBegin;
762   if (!baij->donotstash &&  !mat->nooffprocentries) {
763     while (1) {
764       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
765       if (!flg) break;
766 
767       for (i=0; i<n;) {
768         /* Now identify the consecutive vals belonging to the same row */
769         for (j=i,rstart=row[j]; j<n; j++) {
770           if (row[j] != rstart) break;
771         }
772         if (j < n) ncols = j-i;
773         else       ncols = n-i;
774         /* Now assemble all these values with a single function call */
775         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
776         i    = j;
777       }
778     }
779     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
780     /* Now process the block-stash. Since the values are stashed column-oriented,
781        set the roworiented flag to column oriented, and after MatSetValues()
782        restore the original flags */
783     r1 = baij->roworiented;
784     r2 = a->roworiented;
785     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
786 
787     baij->roworiented = PETSC_FALSE;
788     a->roworiented    = PETSC_FALSE;
789 
790     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
791     while (1) {
792       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
793       if (!flg) break;
794 
795       for (i=0; i<n;) {
796         /* Now identify the consecutive vals belonging to the same row */
797         for (j=i,rstart=row[j]; j<n; j++) {
798           if (row[j] != rstart) break;
799         }
800         if (j < n) ncols = j-i;
801         else       ncols = n-i;
802         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
803         i    = j;
804       }
805     }
806     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
807 
808     baij->roworiented = r1;
809     a->roworiented    = r2;
810 
811     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
812   }
813 
814   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
815   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
816 
817   /* determine if any processor has disassembled, if so we must
818      also disassemble ourselfs, in order that we may reassemble. */
819   /*
820      if nonzero structure of submatrix B cannot change then we know that
821      no processor disassembled thus we can skip this stuff
822   */
823   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
824     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
825     if (mat->was_assembled && !other_disassembled) {
826       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
827     }
828   }
829 
830   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
831     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
832   }
833   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
834   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
835 
836   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
837 
838   baij->rowvalues = 0;
839 
840   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
841   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
842     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
843     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
844   }
845   PetscFunctionReturn(0);
846 }
847 
848 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
849 #include <petscdraw.h>
850 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
851 {
852   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
853   PetscErrorCode    ierr;
854   PetscInt          bs   = mat->rmap->bs;
855   PetscMPIInt       rank = baij->rank;
856   PetscBool         iascii,isdraw;
857   PetscViewer       sviewer;
858   PetscViewerFormat format;
859 
860   PetscFunctionBegin;
861   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
862   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
863   if (iascii) {
864     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
865     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
866       MatInfo info;
867       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
868       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
869       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
870       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
871       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
872       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
873       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
874       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
875       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
876       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
877       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
878       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
879       PetscFunctionReturn(0);
880     } else if (format == PETSC_VIEWER_ASCII_INFO) {
881       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
882       PetscFunctionReturn(0);
883     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
884       PetscFunctionReturn(0);
885     }
886   }
887 
888   if (isdraw) {
889     PetscDraw draw;
890     PetscBool isnull;
891     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
892     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
893     if (isnull) PetscFunctionReturn(0);
894   }
895 
896   {
897     /* assemble the entire matrix onto first processor. */
898     Mat          A;
899     Mat_SeqSBAIJ *Aloc;
900     Mat_SeqBAIJ  *Bloc;
901     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
902     MatScalar    *a;
903     const char   *matname;
904 
905     /* Should this be the same type as mat? */
906     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
907     if (!rank) {
908       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
909     } else {
910       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
911     }
912     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
913     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
914     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
915     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
916 
917     /* copy over the A part */
918     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
919     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
920     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
921 
922     for (i=0; i<mbs; i++) {
923       rvals[0] = bs*(baij->rstartbs + i);
924       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
925       for (j=ai[i]; j<ai[i+1]; j++) {
926         col = (baij->cstartbs+aj[j])*bs;
927         for (k=0; k<bs; k++) {
928           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
929           col++;
930           a += bs;
931         }
932       }
933     }
934     /* copy over the B part */
935     Bloc = (Mat_SeqBAIJ*)baij->B->data;
936     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
937     for (i=0; i<mbs; i++) {
938 
939       rvals[0] = bs*(baij->rstartbs + i);
940       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
941       for (j=ai[i]; j<ai[i+1]; j++) {
942         col = baij->garray[aj[j]]*bs;
943         for (k=0; k<bs; k++) {
944           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
945           col++;
946           a += bs;
947         }
948       }
949     }
950     ierr = PetscFree(rvals);CHKERRQ(ierr);
951     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
952     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
953     /*
954        Everyone has to call to draw the matrix since the graphics waits are
955        synchronized across all processors that share the PetscDraw object
956     */
957     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
958     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
959     if (!rank) {
960       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
961       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
962     }
963     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
964     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
965     ierr = MatDestroy(&A);CHKERRQ(ierr);
966   }
967   PetscFunctionReturn(0);
968 }
969 
970 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
971 {
972   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
973   Mat_SeqSBAIJ   *A = (Mat_SeqSBAIJ*)a->A->data;
974   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
975   PetscErrorCode ierr;
976   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
977   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
978   int            fd;
979   PetscScalar    *column_values;
980   FILE           *file;
981   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
982   PetscInt       message_count,flowcontrolcount;
983 
984   PetscFunctionBegin;
985   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
986   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
987   nz   = bs2*(A->nz + B->nz);
988   rlen = mat->rmap->n;
989   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
990   if (!rank) {
991     header[0] = MAT_FILE_CLASSID;
992     header[1] = mat->rmap->N;
993     header[2] = mat->cmap->N;
994 
995     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
996     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
997     /* get largest number of rows any processor has */
998     range = mat->rmap->range;
999     for (i=1; i<size; i++) {
1000       rlen = PetscMax(rlen,range[i+1] - range[i]);
1001     }
1002   } else {
1003     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1004   }
1005 
1006   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1007   /* compute lengths of each row  */
1008   for (i=0; i<a->mbs; i++) {
1009     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1010   }
1011   /* store the row lengths to the file */
1012   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1013   if (!rank) {
1014     MPI_Status status;
1015     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1016     rlen = (range[1] - range[0])/bs;
1017     for (i=0; i<rlen; i++) {
1018       for (j=0; j<bs; j++) {
1019         row_lens[i*bs+j] = bs*crow_lens[i];
1020       }
1021     }
1022     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       rlen = (range[i+1] - range[i])/bs;
1025       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1026       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1027       for (k=0; k<rlen; k++) {
1028         for (j=0; j<bs; j++) {
1029           row_lens[k*bs+j] = bs*crow_lens[k];
1030         }
1031       }
1032       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1033     }
1034     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1035     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1036   } else {
1037     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1038     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1039     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1040   }
1041   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1042 
1043   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1044      information needed to make it for each row from a block row. This does require more communication but still not more than
1045      the communication needed for the nonzero values  */
1046   nzmax = nz; /*  space a largest processor needs */
1047   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1048   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1049   cnt   = 0;
1050   for (i=0; i<a->mbs; i++) {
1051     pcnt = cnt;
1052     for (j=B->i[i]; j<B->i[i+1]; j++) {
1053       if ((col = garray[B->j[j]]) > cstart) break;
1054       for (l=0; l<bs; l++) {
1055         column_indices[cnt++] = bs*col+l;
1056       }
1057     }
1058     for (k=A->i[i]; k<A->i[i+1]; k++) {
1059       for (l=0; l<bs; l++) {
1060         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1061       }
1062     }
1063     for (; j<B->i[i+1]; j++) {
1064       for (l=0; l<bs; l++) {
1065         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1066       }
1067     }
1068     len = cnt - pcnt;
1069     for (k=1; k<bs; k++) {
1070       ierr = PetscArraycpy(&column_indices[cnt],&column_indices[pcnt],len);CHKERRQ(ierr);
1071       cnt += len;
1072     }
1073   }
1074   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1075 
1076   /* store the columns to the file */
1077   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1078   if (!rank) {
1079     MPI_Status status;
1080     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1081     for (i=1; i<size; i++) {
1082       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1083       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1084       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1085       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1086     }
1087     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1088   } else {
1089     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1090     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1091     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1092     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1095 
1096   /* load up the numerical values */
1097   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1098   cnt  = 0;
1099   for (i=0; i<a->mbs; i++) {
1100     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1101     for (j=B->i[i]; j<B->i[i+1]; j++) {
1102       if (garray[B->j[j]] > cstart) break;
1103       for (l=0; l<bs; l++) {
1104         for (ll=0; ll<bs; ll++) {
1105           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1106         }
1107       }
1108       cnt += bs;
1109     }
1110     for (k=A->i[i]; k<A->i[i+1]; k++) {
1111       for (l=0; l<bs; l++) {
1112         for (ll=0; ll<bs; ll++) {
1113           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1114         }
1115       }
1116       cnt += bs;
1117     }
1118     for (; j<B->i[i+1]; j++) {
1119       for (l=0; l<bs; l++) {
1120         for (ll=0; ll<bs; ll++) {
1121           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1122         }
1123       }
1124       cnt += bs;
1125     }
1126     cnt += (bs-1)*rlen;
1127   }
1128   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1129 
1130   /* store the column values to the file */
1131   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1132   if (!rank) {
1133     MPI_Status status;
1134     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1135     for (i=1; i<size; i++) {
1136       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1137       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1138       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1139       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1140     }
1141     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1142   } else {
1143     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1144     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1145     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1146     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1147   }
1148   ierr = PetscFree(column_values);CHKERRQ(ierr);
1149 
1150   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1151   if (file) {
1152     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1153   }
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1158 {
1159   PetscErrorCode ierr;
1160   PetscBool      iascii,isdraw,issocket,isbinary;
1161 
1162   PetscFunctionBegin;
1163   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1164   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1165   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1166   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1167   if (iascii || isdraw || issocket) {
1168     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1169   } else if (isbinary) {
1170     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1171   }
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1176 {
1177   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1178   PetscErrorCode ierr;
1179 
1180   PetscFunctionBegin;
1181 #if defined(PETSC_USE_LOG)
1182   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1183 #endif
1184   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1185   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1186   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1187   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1188 #if defined(PETSC_USE_CTABLE)
1189   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1190 #else
1191   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1192 #endif
1193   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1194   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1195   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1196   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1197   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1198   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1199   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1200   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1201   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1202   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1203   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1204   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1205   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1206   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1207   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1208 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1209   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1210 #endif
1211   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1212   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1213   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1214   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1215 
1216   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1220 #if defined(PETSC_HAVE_ELEMENTAL)
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1222 #endif
1223   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1229 {
1230   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1231   PetscErrorCode    ierr;
1232   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1233   PetscScalar       *from;
1234   const PetscScalar *x;
1235 
1236   PetscFunctionBegin;
1237   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1238   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1239 
1240   /* diagonal part */
1241   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1242   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1243 
1244   /* subdiagonal part */
1245   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1246 
1247   /* copy x into the vec slvec0 */
1248   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1249   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1250 
1251   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1252   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1253   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1254 
1255   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1256   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1257   /* supperdiagonal part */
1258   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1263 {
1264   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1265   PetscErrorCode    ierr;
1266   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1267   PetscScalar       *from;
1268   const PetscScalar *x;
1269 
1270   PetscFunctionBegin;
1271   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1272   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1273 
1274   /* diagonal part */
1275   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1276   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1277 
1278   /* subdiagonal part */
1279   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1280 
1281   /* copy x into the vec slvec0 */
1282   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1283   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1284 
1285   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1286   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1287   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1288 
1289   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1290   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1291   /* supperdiagonal part */
1292   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1297 {
1298   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1299   PetscErrorCode ierr;
1300   PetscInt       nt;
1301 
1302   PetscFunctionBegin;
1303   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1304   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1305 
1306   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1307   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1308 
1309   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1310   /* do diagonal part */
1311   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1312   /* do supperdiagonal part */
1313   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1314   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1315   /* do subdiagonal part */
1316   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1317   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1318   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1323 {
1324   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1325   PetscErrorCode    ierr;
1326   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1327   PetscScalar       *from,zero=0.0;
1328   const PetscScalar *x;
1329 
1330   PetscFunctionBegin;
1331   /*
1332   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1333   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1334   */
1335   /* diagonal part */
1336   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1337   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1338 
1339   /* subdiagonal part */
1340   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1341 
1342   /* copy x into the vec slvec0 */
1343   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1344   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1345   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1346   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1347 
1348   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1349   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1350   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1351 
1352   /* supperdiagonal part */
1353   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1358 {
1359   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1360   PetscErrorCode ierr;
1361 
1362   PetscFunctionBegin;
1363   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1364   /* do diagonal part */
1365   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1366   /* do supperdiagonal part */
1367   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1368   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1369 
1370   /* do subdiagonal part */
1371   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1372   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1373   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 /*
1378   This only works correctly for square matrices where the subblock A->A is the
1379    diagonal block
1380 */
1381 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1382 {
1383   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1384   PetscErrorCode ierr;
1385 
1386   PetscFunctionBegin;
1387   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1388   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1393 {
1394   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1395   PetscErrorCode ierr;
1396 
1397   PetscFunctionBegin;
1398   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1399   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1404 {
1405   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1406   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1407   PetscErrorCode ierr;
1408   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1409   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1410   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1411 
1412   PetscFunctionBegin;
1413   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1414   mat->getrowactive = PETSC_TRUE;
1415 
1416   if (!mat->rowvalues && (idx || v)) {
1417     /*
1418         allocate enough space to hold information from the longest row.
1419     */
1420     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1421     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1422     PetscInt     max = 1,mbs = mat->mbs,tmp;
1423     for (i=0; i<mbs; i++) {
1424       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1425       if (max < tmp) max = tmp;
1426     }
1427     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1428   }
1429 
1430   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1431   lrow = row - brstart;  /* local row index */
1432 
1433   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1434   if (!v)   {pvA = 0; pvB = 0;}
1435   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1436   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1437   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1438   nztot = nzA + nzB;
1439 
1440   cmap = mat->garray;
1441   if (v  || idx) {
1442     if (nztot) {
1443       /* Sort by increasing column numbers, assuming A and B already sorted */
1444       PetscInt imark = -1;
1445       if (v) {
1446         *v = v_p = mat->rowvalues;
1447         for (i=0; i<nzB; i++) {
1448           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1449           else break;
1450         }
1451         imark = i;
1452         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1453         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1454       }
1455       if (idx) {
1456         *idx = idx_p = mat->rowindices;
1457         if (imark > -1) {
1458           for (i=0; i<imark; i++) {
1459             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1460           }
1461         } else {
1462           for (i=0; i<nzB; i++) {
1463             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1464             else break;
1465           }
1466           imark = i;
1467         }
1468         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1469         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1470       }
1471     } else {
1472       if (idx) *idx = 0;
1473       if (v)   *v   = 0;
1474     }
1475   }
1476   *nz  = nztot;
1477   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1478   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1483 {
1484   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1485 
1486   PetscFunctionBegin;
1487   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1488   baij->getrowactive = PETSC_FALSE;
1489   PetscFunctionReturn(0);
1490 }
1491 
1492 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1493 {
1494   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1495   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1496 
1497   PetscFunctionBegin;
1498   aA->getrow_utriangular = PETSC_TRUE;
1499   PetscFunctionReturn(0);
1500 }
1501 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1502 {
1503   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1504   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1505 
1506   PetscFunctionBegin;
1507   aA->getrow_utriangular = PETSC_FALSE;
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1512 {
1513   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1514   PetscErrorCode ierr;
1515 
1516   PetscFunctionBegin;
1517   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1518   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1523 {
1524   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1525   PetscErrorCode ierr;
1526 
1527   PetscFunctionBegin;
1528   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1529   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1534    Input: isrow       - distributed(parallel),
1535           iscol_local - locally owned (seq)
1536 */
1537 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1538 {
1539   PetscErrorCode ierr;
1540   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1541   const PetscInt *ptr1,*ptr2;
1542 
1543   PetscFunctionBegin;
1544   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1545   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1546   if (sz1 > sz2) {
1547     *flg = PETSC_FALSE;
1548     PetscFunctionReturn(0);
1549   }
1550 
1551   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1552   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1553 
1554   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1555   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1556   ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr);
1557   ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr);
1558   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1559   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1560 
1561   nmatch=0;
1562   k     = 0;
1563   for (i=0; i<sz1; i++){
1564     for (j=k; j<sz2; j++){
1565       if (a1[i] == a2[j]) {
1566         k = j; nmatch++;
1567         break;
1568       }
1569     }
1570   }
1571   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1572   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1573   ierr = PetscFree(a1);CHKERRQ(ierr);
1574   ierr = PetscFree(a2);CHKERRQ(ierr);
1575   if (nmatch < sz1) {
1576     *flg = PETSC_FALSE;
1577   } else {
1578     *flg = PETSC_TRUE;
1579   }
1580   PetscFunctionReturn(0);
1581 }
1582 
1583 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1584 {
1585   PetscErrorCode ierr;
1586   IS             iscol_local;
1587   PetscInt       csize;
1588   PetscBool      isequal;
1589 
1590   PetscFunctionBegin;
1591   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1592   if (call == MAT_REUSE_MATRIX) {
1593     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1594     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1595   } else {
1596     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1597     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1598     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1599   }
1600 
1601   /* now call MatCreateSubMatrix_MPIBAIJ() */
1602   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1603   if (call == MAT_INITIAL_MATRIX) {
1604     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1605     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1606   }
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1611 {
1612   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1613   PetscErrorCode ierr;
1614 
1615   PetscFunctionBegin;
1616   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1617   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1622 {
1623   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1624   Mat            A  = a->A,B = a->B;
1625   PetscErrorCode ierr;
1626   PetscReal      isend[5],irecv[5];
1627 
1628   PetscFunctionBegin;
1629   info->block_size = (PetscReal)matin->rmap->bs;
1630 
1631   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1632 
1633   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1634   isend[3] = info->memory;  isend[4] = info->mallocs;
1635 
1636   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1637 
1638   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1639   isend[3] += info->memory;  isend[4] += info->mallocs;
1640   if (flag == MAT_LOCAL) {
1641     info->nz_used      = isend[0];
1642     info->nz_allocated = isend[1];
1643     info->nz_unneeded  = isend[2];
1644     info->memory       = isend[3];
1645     info->mallocs      = isend[4];
1646   } else if (flag == MAT_GLOBAL_MAX) {
1647     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1648 
1649     info->nz_used      = irecv[0];
1650     info->nz_allocated = irecv[1];
1651     info->nz_unneeded  = irecv[2];
1652     info->memory       = irecv[3];
1653     info->mallocs      = irecv[4];
1654   } else if (flag == MAT_GLOBAL_SUM) {
1655     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1656 
1657     info->nz_used      = irecv[0];
1658     info->nz_allocated = irecv[1];
1659     info->nz_unneeded  = irecv[2];
1660     info->memory       = irecv[3];
1661     info->mallocs      = irecv[4];
1662   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1663   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1664   info->fill_ratio_needed = 0;
1665   info->factor_mallocs    = 0;
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1670 {
1671   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1672   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1673   PetscErrorCode ierr;
1674 
1675   PetscFunctionBegin;
1676   switch (op) {
1677   case MAT_NEW_NONZERO_LOCATIONS:
1678   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1679   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1680   case MAT_KEEP_NONZERO_PATTERN:
1681   case MAT_SUBMAT_SINGLEIS:
1682   case MAT_NEW_NONZERO_LOCATION_ERR:
1683     MatCheckPreallocated(A,1);
1684     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1685     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1686     break;
1687   case MAT_ROW_ORIENTED:
1688     MatCheckPreallocated(A,1);
1689     a->roworiented = flg;
1690 
1691     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1692     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1693     break;
1694   case MAT_NEW_DIAGONALS:
1695   case MAT_SORTED_FULL:
1696     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1697     break;
1698   case MAT_IGNORE_OFF_PROC_ENTRIES:
1699     a->donotstash = flg;
1700     break;
1701   case MAT_USE_HASH_TABLE:
1702     a->ht_flag = flg;
1703     break;
1704   case MAT_HERMITIAN:
1705     MatCheckPreallocated(A,1);
1706     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1707     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1708 #if defined(PETSC_USE_COMPLEX)
1709     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1710 #endif
1711     break;
1712   case MAT_SPD:
1713     A->spd_set = PETSC_TRUE;
1714     A->spd     = flg;
1715     if (flg) {
1716       A->symmetric                  = PETSC_TRUE;
1717       A->structurally_symmetric     = PETSC_TRUE;
1718       A->symmetric_set              = PETSC_TRUE;
1719       A->structurally_symmetric_set = PETSC_TRUE;
1720     }
1721     break;
1722   case MAT_SYMMETRIC:
1723     MatCheckPreallocated(A,1);
1724     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1725     break;
1726   case MAT_STRUCTURALLY_SYMMETRIC:
1727     MatCheckPreallocated(A,1);
1728     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1729     break;
1730   case MAT_SYMMETRY_ETERNAL:
1731     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1732     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1733     break;
1734   case MAT_IGNORE_LOWER_TRIANGULAR:
1735     aA->ignore_ltriangular = flg;
1736     break;
1737   case MAT_ERROR_LOWER_TRIANGULAR:
1738     aA->ignore_ltriangular = flg;
1739     break;
1740   case MAT_GETROW_UPPERTRIANGULAR:
1741     aA->getrow_utriangular = flg;
1742     break;
1743   default:
1744     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1745   }
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1750 {
1751   PetscErrorCode ierr;
1752 
1753   PetscFunctionBegin;
1754   if (reuse == MAT_INITIAL_MATRIX) {
1755     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1756   }  else if (reuse == MAT_REUSE_MATRIX) {
1757     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1758   }
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1763 {
1764   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1765   Mat            a     = baij->A, b=baij->B;
1766   PetscErrorCode ierr;
1767   PetscInt       nv,m,n;
1768   PetscBool      flg;
1769 
1770   PetscFunctionBegin;
1771   if (ll != rr) {
1772     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1773     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1774   }
1775   if (!ll) PetscFunctionReturn(0);
1776 
1777   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1778   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1779 
1780   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1781   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1782 
1783   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1784 
1785   /* left diagonalscale the off-diagonal part */
1786   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1787 
1788   /* scale the diagonal part */
1789   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1790 
1791   /* right diagonalscale the off-diagonal part */
1792   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1793   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1798 {
1799   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1800   PetscErrorCode ierr;
1801 
1802   PetscFunctionBegin;
1803   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1808 
1809 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1810 {
1811   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1812   Mat            a,b,c,d;
1813   PetscBool      flg;
1814   PetscErrorCode ierr;
1815 
1816   PetscFunctionBegin;
1817   a = matA->A; b = matA->B;
1818   c = matB->A; d = matB->B;
1819 
1820   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1821   if (flg) {
1822     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1823   }
1824   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1829 {
1830   PetscErrorCode ierr;
1831   PetscBool      isbaij;
1832 
1833   PetscFunctionBegin;
1834   ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
1835   if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1836   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1837   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1838     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1839     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1840     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1841   } else {
1842     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1843     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1844 
1845     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1846     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1847   }
1848   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1853 {
1854   PetscErrorCode ierr;
1855 
1856   PetscFunctionBegin;
1857   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1862 {
1863   PetscErrorCode ierr;
1864   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1865   PetscBLASInt   bnz,one=1;
1866   Mat_SeqSBAIJ   *xa,*ya;
1867   Mat_SeqBAIJ    *xb,*yb;
1868 
1869   PetscFunctionBegin;
1870   if (str == SAME_NONZERO_PATTERN) {
1871     PetscScalar alpha = a;
1872     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1873     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1874     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1875     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1876     xb   = (Mat_SeqBAIJ*)xx->B->data;
1877     yb   = (Mat_SeqBAIJ*)yy->B->data;
1878     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1879     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1880     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1881   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1882     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1883     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1884     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1885   } else {
1886     Mat      B;
1887     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1888     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1889     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1890     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1891     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1892     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1893     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1894     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1895     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1896     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1897     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1898     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1899     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1900     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1901     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1902     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1903     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1904     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1905     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1906     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1907   }
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1912 {
1913   PetscErrorCode ierr;
1914   PetscInt       i;
1915   PetscBool      flg;
1916 
1917   PetscFunctionBegin;
1918   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1919   for (i=0; i<n; i++) {
1920     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1921     if (!flg) {
1922       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1923     }
1924   }
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1929 {
1930   PetscErrorCode ierr;
1931   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1932   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1933 
1934   PetscFunctionBegin;
1935   if (!Y->preallocated) {
1936     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1937   } else if (!aij->nz) {
1938     PetscInt nonew = aij->nonew;
1939     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1940     aij->nonew = nonew;
1941   }
1942   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1947 {
1948   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1949   PetscErrorCode ierr;
1950 
1951   PetscFunctionBegin;
1952   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1953   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1954   if (d) {
1955     PetscInt rstart;
1956     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1957     *d += rstart/A->rmap->bs;
1958 
1959   }
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1964 {
1965   PetscFunctionBegin;
1966   *a = ((Mat_MPISBAIJ*)A->data)->A;
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 /* -------------------------------------------------------------------*/
1971 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1972                                        MatGetRow_MPISBAIJ,
1973                                        MatRestoreRow_MPISBAIJ,
1974                                        MatMult_MPISBAIJ,
1975                                /*  4*/ MatMultAdd_MPISBAIJ,
1976                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1977                                        MatMultAdd_MPISBAIJ,
1978                                        0,
1979                                        0,
1980                                        0,
1981                                /* 10*/ 0,
1982                                        0,
1983                                        0,
1984                                        MatSOR_MPISBAIJ,
1985                                        MatTranspose_MPISBAIJ,
1986                                /* 15*/ MatGetInfo_MPISBAIJ,
1987                                        MatEqual_MPISBAIJ,
1988                                        MatGetDiagonal_MPISBAIJ,
1989                                        MatDiagonalScale_MPISBAIJ,
1990                                        MatNorm_MPISBAIJ,
1991                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1992                                        MatAssemblyEnd_MPISBAIJ,
1993                                        MatSetOption_MPISBAIJ,
1994                                        MatZeroEntries_MPISBAIJ,
1995                                /* 24*/ 0,
1996                                        0,
1997                                        0,
1998                                        0,
1999                                        0,
2000                                /* 29*/ MatSetUp_MPISBAIJ,
2001                                        0,
2002                                        0,
2003                                        MatGetDiagonalBlock_MPISBAIJ,
2004                                        0,
2005                                /* 34*/ MatDuplicate_MPISBAIJ,
2006                                        0,
2007                                        0,
2008                                        0,
2009                                        0,
2010                                /* 39*/ MatAXPY_MPISBAIJ,
2011                                        MatCreateSubMatrices_MPISBAIJ,
2012                                        MatIncreaseOverlap_MPISBAIJ,
2013                                        MatGetValues_MPISBAIJ,
2014                                        MatCopy_MPISBAIJ,
2015                                /* 44*/ 0,
2016                                        MatScale_MPISBAIJ,
2017                                        MatShift_MPISBAIJ,
2018                                        0,
2019                                        0,
2020                                /* 49*/ 0,
2021                                        0,
2022                                        0,
2023                                        0,
2024                                        0,
2025                                /* 54*/ 0,
2026                                        0,
2027                                        MatSetUnfactored_MPISBAIJ,
2028                                        0,
2029                                        MatSetValuesBlocked_MPISBAIJ,
2030                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
2031                                        0,
2032                                        0,
2033                                        0,
2034                                        0,
2035                                /* 64*/ 0,
2036                                        0,
2037                                        0,
2038                                        0,
2039                                        0,
2040                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
2041                                        0,
2042                                        0,
2043                                        0,
2044                                        0,
2045                                /* 74*/ 0,
2046                                        0,
2047                                        0,
2048                                        0,
2049                                        0,
2050                                /* 79*/ 0,
2051                                        0,
2052                                        0,
2053                                        0,
2054                                        MatLoad_MPISBAIJ,
2055                                /* 84*/ 0,
2056                                        0,
2057                                        0,
2058                                        0,
2059                                        0,
2060                                /* 89*/ 0,
2061                                        0,
2062                                        0,
2063                                        0,
2064                                        0,
2065                                /* 94*/ 0,
2066                                        0,
2067                                        0,
2068                                        0,
2069                                        0,
2070                                /* 99*/ 0,
2071                                        0,
2072                                        0,
2073                                        0,
2074                                        0,
2075                                /*104*/ 0,
2076                                        MatRealPart_MPISBAIJ,
2077                                        MatImaginaryPart_MPISBAIJ,
2078                                        MatGetRowUpperTriangular_MPISBAIJ,
2079                                        MatRestoreRowUpperTriangular_MPISBAIJ,
2080                                /*109*/ 0,
2081                                        0,
2082                                        0,
2083                                        0,
2084                                        MatMissingDiagonal_MPISBAIJ,
2085                                /*114*/ 0,
2086                                        0,
2087                                        0,
2088                                        0,
2089                                        0,
2090                                /*119*/ 0,
2091                                        0,
2092                                        0,
2093                                        0,
2094                                        0,
2095                                /*124*/ 0,
2096                                        0,
2097                                        0,
2098                                        0,
2099                                        0,
2100                                /*129*/ 0,
2101                                        0,
2102                                        0,
2103                                        0,
2104                                        0,
2105                                /*134*/ 0,
2106                                        0,
2107                                        0,
2108                                        0,
2109                                        0,
2110                                /*139*/ MatSetBlockSizes_Default,
2111                                        0,
2112                                        0,
2113                                        0,
2114                                        0,
2115                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2116 };
2117 
2118 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2119 {
2120   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
2121   PetscErrorCode ierr;
2122   PetscInt       i,mbs,Mbs;
2123   PetscMPIInt    size;
2124 
2125   PetscFunctionBegin;
2126   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2127   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2128   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2129   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2130   if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N);
2131   if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n);
2132 
2133   mbs = B->rmap->n/bs;
2134   Mbs = B->rmap->N/bs;
2135   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2136 
2137   B->rmap->bs = bs;
2138   b->bs2      = bs*bs;
2139   b->mbs      = mbs;
2140   b->Mbs      = Mbs;
2141   b->nbs      = B->cmap->n/bs;
2142   b->Nbs      = B->cmap->N/bs;
2143 
2144   for (i=0; i<=b->size; i++) {
2145     b->rangebs[i] = B->rmap->range[i]/bs;
2146   }
2147   b->rstartbs = B->rmap->rstart/bs;
2148   b->rendbs   = B->rmap->rend/bs;
2149 
2150   b->cstartbs = B->cmap->rstart/bs;
2151   b->cendbs   = B->cmap->rend/bs;
2152 
2153 #if defined(PETSC_USE_CTABLE)
2154   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2155 #else
2156   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2157 #endif
2158   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2159   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2160   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2161   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2162   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2163   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2164   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2165   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2166   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2167 
2168   /* Because the B will have been resized we simply destroy it and create a new one each time */
2169   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2170   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2171   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2172   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2173   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2174   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2175 
2176   if (!B->preallocated) {
2177     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2178     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2179     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2180     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2181     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2182   }
2183 
2184   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2185   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2186 
2187   B->preallocated  = PETSC_TRUE;
2188   B->was_assembled = PETSC_FALSE;
2189   B->assembled     = PETSC_FALSE;
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2194 {
2195   PetscInt       m,rstart,cend;
2196   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0;
2197   const PetscInt *JJ    =0;
2198   PetscScalar    *values=0;
2199   PetscErrorCode ierr;
2200 
2201   PetscFunctionBegin;
2202   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2203   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2204   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2205   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2206   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2207   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2208   m      = B->rmap->n/bs;
2209   rstart = B->rmap->rstart/bs;
2210   cend   = B->cmap->rend/bs;
2211 
2212   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2213   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2214   for (i=0; i<m; i++) {
2215     nz = ii[i+1] - ii[i];
2216     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2217     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2218     JJ     = jj + ii[i];
2219     bd     = 0;
2220     for (j=0; j<nz; j++) {
2221       if (*JJ >= i + rstart) break;
2222       JJ++;
2223       bd++;
2224     }
2225     d  = 0;
2226     for (; j<nz; j++) {
2227       if (*JJ++ >= cend) break;
2228       d++;
2229     }
2230     d_nnz[i] = d;
2231     o_nnz[i] = nz - d - bd;
2232     nz       = nz - bd;
2233     nz_max = PetscMax(nz_max,nz);
2234   }
2235   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2236   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2237 
2238   values = (PetscScalar*)V;
2239   if (!values) {
2240     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2241   }
2242   for (i=0; i<m; i++) {
2243     PetscInt          row    = i + rstart;
2244     PetscInt          ncols  = ii[i+1] - ii[i];
2245     const PetscInt    *icols = jj + ii[i];
2246     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2247     if (bs == 1) {
2248       ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2249     } else {
2250       for (j=0; j<ncols; j++) {
2251         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2252         ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2253       }
2254     }
2255   }
2256 
2257   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2258   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2259   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2260   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 /*MC
2265    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2266    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2267    the matrix is stored.
2268 
2269    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2270    can call MatSetOption(Mat, MAT_HERMITIAN);
2271 
2272    Options Database Keys:
2273 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2274 
2275    Notes:
2276      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2277      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2278 
2279    Level: beginner
2280 
2281 .seealso: MatCreateMPISBAIJ(), MATSEQSBAIJ, MatType
2282 M*/
2283 
2284 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2285 {
2286   Mat_MPISBAIJ   *b;
2287   PetscErrorCode ierr;
2288   PetscBool      flg = PETSC_FALSE;
2289 
2290   PetscFunctionBegin;
2291   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2292   B->data = (void*)b;
2293   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2294 
2295   B->ops->destroy = MatDestroy_MPISBAIJ;
2296   B->ops->view    = MatView_MPISBAIJ;
2297   B->assembled    = PETSC_FALSE;
2298   B->insertmode   = NOT_SET_VALUES;
2299 
2300   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2301   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2302 
2303   /* build local table of row and column ownerships */
2304   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2305 
2306   /* build cache for off array entries formed */
2307   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2308 
2309   b->donotstash  = PETSC_FALSE;
2310   b->colmap      = NULL;
2311   b->garray      = NULL;
2312   b->roworiented = PETSC_TRUE;
2313 
2314   /* stuff used in block assembly */
2315   b->barray = 0;
2316 
2317   /* stuff used for matrix vector multiply */
2318   b->lvec    = 0;
2319   b->Mvctx   = 0;
2320   b->slvec0  = 0;
2321   b->slvec0b = 0;
2322   b->slvec1  = 0;
2323   b->slvec1a = 0;
2324   b->slvec1b = 0;
2325   b->sMvctx  = 0;
2326 
2327   /* stuff for MatGetRow() */
2328   b->rowindices   = 0;
2329   b->rowvalues    = 0;
2330   b->getrowactive = PETSC_FALSE;
2331 
2332   /* hash table stuff */
2333   b->ht           = 0;
2334   b->hd           = 0;
2335   b->ht_size      = 0;
2336   b->ht_flag      = PETSC_FALSE;
2337   b->ht_fact      = 0;
2338   b->ht_total_ct  = 0;
2339   b->ht_insert_ct = 0;
2340 
2341   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2342   b->ijonly = PETSC_FALSE;
2343 
2344   b->in_loc = 0;
2345   b->v_loc  = 0;
2346   b->n_loc  = 0;
2347 
2348   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2349   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2350   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2351   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2352 #if defined(PETSC_HAVE_ELEMENTAL)
2353   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2354 #endif
2355   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr);
2356   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr);
2357 
2358   B->symmetric                  = PETSC_TRUE;
2359   B->structurally_symmetric     = PETSC_TRUE;
2360   B->symmetric_set              = PETSC_TRUE;
2361   B->structurally_symmetric_set = PETSC_TRUE;
2362   B->symmetric_eternal          = PETSC_TRUE;
2363 
2364   B->hermitian                  = PETSC_FALSE;
2365   B->hermitian_set              = PETSC_FALSE;
2366 
2367   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2368   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2369   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2370   if (flg) {
2371     PetscReal fact = 1.39;
2372     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2373     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2374     if (fact <= 1.0) fact = 1.39;
2375     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2376     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2377   }
2378   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 /*MC
2383    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2384 
2385    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2386    and MATMPISBAIJ otherwise.
2387 
2388    Options Database Keys:
2389 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2390 
2391   Level: beginner
2392 
2393 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2394 M*/
2395 
2396 /*@C
2397    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2398    the user should preallocate the matrix storage by setting the parameters
2399    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2400    performance can be increased by more than a factor of 50.
2401 
2402    Collective on Mat
2403 
2404    Input Parameters:
2405 +  B - the matrix
2406 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2407           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2408 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2409            submatrix  (same for all local rows)
2410 .  d_nnz - array containing the number of block nonzeros in the various block rows
2411            in the upper triangular and diagonal part of the in diagonal portion of the local
2412            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2413            for the diagonal entry and set a value even if it is zero.
2414 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2415            submatrix (same for all local rows).
2416 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2417            off-diagonal portion of the local submatrix that is right of the diagonal
2418            (possibly different for each block row) or NULL.
2419 
2420 
2421    Options Database Keys:
2422 +   -mat_no_unroll - uses code that does not unroll the loops in the
2423                      block calculations (much slower)
2424 -   -mat_block_size - size of the blocks to use
2425 
2426    Notes:
2427 
2428    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2429    than it must be used on all processors that share the object for that argument.
2430 
2431    If the *_nnz parameter is given then the *_nz parameter is ignored
2432 
2433    Storage Information:
2434    For a square global matrix we define each processor's diagonal portion
2435    to be its local rows and the corresponding columns (a square submatrix);
2436    each processor's off-diagonal portion encompasses the remainder of the
2437    local matrix (a rectangular submatrix).
2438 
2439    The user can specify preallocated storage for the diagonal part of
2440    the local submatrix with either d_nz or d_nnz (not both).  Set
2441    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2442    memory allocation.  Likewise, specify preallocated storage for the
2443    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2444 
2445    You can call MatGetInfo() to get information on how effective the preallocation was;
2446    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2447    You can also run with the option -info and look for messages with the string
2448    malloc in them to see if additional memory allocation was needed.
2449 
2450    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2451    the figure below we depict these three local rows and all columns (0-11).
2452 
2453 .vb
2454            0 1 2 3 4 5 6 7 8 9 10 11
2455           --------------------------
2456    row 3  |. . . d d d o o o o  o  o
2457    row 4  |. . . d d d o o o o  o  o
2458    row 5  |. . . d d d o o o o  o  o
2459           --------------------------
2460 .ve
2461 
2462    Thus, any entries in the d locations are stored in the d (diagonal)
2463    submatrix, and any entries in the o locations are stored in the
2464    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2465    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2466 
2467    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2468    plus the diagonal part of the d matrix,
2469    and o_nz should indicate the number of block nonzeros per row in the o matrix
2470 
2471    In general, for PDE problems in which most nonzeros are near the diagonal,
2472    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2473    or you will get TERRIBLE performance; see the users' manual chapter on
2474    matrices.
2475 
2476    Level: intermediate
2477 
2478 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2479 @*/
2480 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2481 {
2482   PetscErrorCode ierr;
2483 
2484   PetscFunctionBegin;
2485   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2486   PetscValidType(B,1);
2487   PetscValidLogicalCollectiveInt(B,bs,2);
2488   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 /*@C
2493    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2494    (block compressed row).  For good matrix assembly performance
2495    the user should preallocate the matrix storage by setting the parameters
2496    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2497    performance can be increased by more than a factor of 50.
2498 
2499    Collective
2500 
2501    Input Parameters:
2502 +  comm - MPI communicator
2503 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2504           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2505 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2506            This value should be the same as the local size used in creating the
2507            y vector for the matrix-vector product y = Ax.
2508 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2509            This value should be the same as the local size used in creating the
2510            x vector for the matrix-vector product y = Ax.
2511 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2512 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2513 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2514            submatrix  (same for all local rows)
2515 .  d_nnz - array containing the number of block nonzeros in the various block rows
2516            in the upper triangular portion of the in diagonal portion of the local
2517            (possibly different for each block block row) or NULL.
2518            If you plan to factor the matrix you must leave room for the diagonal entry and
2519            set its value even if it is zero.
2520 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2521            submatrix (same for all local rows).
2522 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2523            off-diagonal portion of the local submatrix (possibly different for
2524            each block row) or NULL.
2525 
2526    Output Parameter:
2527 .  A - the matrix
2528 
2529    Options Database Keys:
2530 +   -mat_no_unroll - uses code that does not unroll the loops in the
2531                      block calculations (much slower)
2532 .   -mat_block_size - size of the blocks to use
2533 -   -mat_mpi - use the parallel matrix data structures even on one processor
2534                (defaults to using SeqBAIJ format on one processor)
2535 
2536    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2537    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2538    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2539 
2540    Notes:
2541    The number of rows and columns must be divisible by blocksize.
2542    This matrix type does not support complex Hermitian operation.
2543 
2544    The user MUST specify either the local or global matrix dimensions
2545    (possibly both).
2546 
2547    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2548    than it must be used on all processors that share the object for that argument.
2549 
2550    If the *_nnz parameter is given then the *_nz parameter is ignored
2551 
2552    Storage Information:
2553    For a square global matrix we define each processor's diagonal portion
2554    to be its local rows and the corresponding columns (a square submatrix);
2555    each processor's off-diagonal portion encompasses the remainder of the
2556    local matrix (a rectangular submatrix).
2557 
2558    The user can specify preallocated storage for the diagonal part of
2559    the local submatrix with either d_nz or d_nnz (not both).  Set
2560    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2561    memory allocation.  Likewise, specify preallocated storage for the
2562    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2563 
2564    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2565    the figure below we depict these three local rows and all columns (0-11).
2566 
2567 .vb
2568            0 1 2 3 4 5 6 7 8 9 10 11
2569           --------------------------
2570    row 3  |. . . d d d o o o o  o  o
2571    row 4  |. . . d d d o o o o  o  o
2572    row 5  |. . . d d d o o o o  o  o
2573           --------------------------
2574 .ve
2575 
2576    Thus, any entries in the d locations are stored in the d (diagonal)
2577    submatrix, and any entries in the o locations are stored in the
2578    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2579    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2580 
2581    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2582    plus the diagonal part of the d matrix,
2583    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2584    In general, for PDE problems in which most nonzeros are near the diagonal,
2585    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2586    or you will get TERRIBLE performance; see the users' manual chapter on
2587    matrices.
2588 
2589    Level: intermediate
2590 
2591 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2592 @*/
2593 
2594 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2595 {
2596   PetscErrorCode ierr;
2597   PetscMPIInt    size;
2598 
2599   PetscFunctionBegin;
2600   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2601   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2602   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2603   if (size > 1) {
2604     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2605     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2606   } else {
2607     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2608     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2609   }
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 
2614 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2615 {
2616   Mat            mat;
2617   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2618   PetscErrorCode ierr;
2619   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2620   PetscScalar    *array;
2621 
2622   PetscFunctionBegin;
2623   *newmat = 0;
2624 
2625   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2626   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2627   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2628   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2629   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2630 
2631   mat->factortype   = matin->factortype;
2632   mat->preallocated = PETSC_TRUE;
2633   mat->assembled    = PETSC_TRUE;
2634   mat->insertmode   = NOT_SET_VALUES;
2635 
2636   a      = (Mat_MPISBAIJ*)mat->data;
2637   a->bs2 = oldmat->bs2;
2638   a->mbs = oldmat->mbs;
2639   a->nbs = oldmat->nbs;
2640   a->Mbs = oldmat->Mbs;
2641   a->Nbs = oldmat->Nbs;
2642 
2643   a->size         = oldmat->size;
2644   a->rank         = oldmat->rank;
2645   a->donotstash   = oldmat->donotstash;
2646   a->roworiented  = oldmat->roworiented;
2647   a->rowindices   = 0;
2648   a->rowvalues    = 0;
2649   a->getrowactive = PETSC_FALSE;
2650   a->barray       = 0;
2651   a->rstartbs     = oldmat->rstartbs;
2652   a->rendbs       = oldmat->rendbs;
2653   a->cstartbs     = oldmat->cstartbs;
2654   a->cendbs       = oldmat->cendbs;
2655 
2656   /* hash table stuff */
2657   a->ht           = 0;
2658   a->hd           = 0;
2659   a->ht_size      = 0;
2660   a->ht_flag      = oldmat->ht_flag;
2661   a->ht_fact      = oldmat->ht_fact;
2662   a->ht_total_ct  = 0;
2663   a->ht_insert_ct = 0;
2664 
2665   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr);
2666   if (oldmat->colmap) {
2667 #if defined(PETSC_USE_CTABLE)
2668     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2669 #else
2670     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2671     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2672     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
2673 #endif
2674   } else a->colmap = 0;
2675 
2676   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2677     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2678     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2679     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
2680   } else a->garray = 0;
2681 
2682   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2683   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2684   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2685   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2686   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2687 
2688   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2689   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2690   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2691   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2692 
2693   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2694   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2695   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2696   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2697   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2698   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2699   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2700   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2701   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2702   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2703   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2704   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2705   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2706 
2707   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2708   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2709   a->sMvctx = oldmat->sMvctx;
2710   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2711 
2712   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2713   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2714   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2715   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2716   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2717   *newmat = mat;
2718   PetscFunctionReturn(0);
2719 }
2720 
2721 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2722 {
2723   PetscErrorCode ierr;
2724   PetscInt       i,nz,j,rstart,rend;
2725   PetscScalar    *vals,*buf;
2726   MPI_Comm       comm;
2727   MPI_Status     status;
2728   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2729   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2730   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2731   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2732   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2733   PetscInt       dcount,kmax,k,nzcount,tmp;
2734   int            fd;
2735   PetscBool      isbinary;
2736 
2737   PetscFunctionBegin;
2738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2739   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
2740 
2741   /* force binary viewer to load .info file if it has not yet done so */
2742   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2743   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2744   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2745   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2746   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2747   if (bs < 0) bs = 1;
2748 
2749   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2750   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2751   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2752   if (!rank) {
2753     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2754     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2755     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2756   }
2757 
2758   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2759   M    = header[1];
2760   N    = header[2];
2761 
2762   /* If global sizes are set, check if they are consistent with that given in the file */
2763   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2764   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2765 
2766   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2767 
2768   /*
2769      This code adds extra rows to make sure the number of rows is
2770      divisible by the blocksize
2771   */
2772   Mbs        = M/bs;
2773   extra_rows = bs - M + bs*(Mbs);
2774   if (extra_rows == bs) extra_rows = 0;
2775   else                  Mbs++;
2776   if (extra_rows &&!rank) {
2777     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2778   }
2779 
2780   /* determine ownership of all rows */
2781   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2782     mbs = Mbs/size + ((Mbs % size) > rank);
2783     m   = mbs*bs;
2784   } else { /* User Set */
2785     m   = newmat->rmap->n;
2786     mbs = m/bs;
2787   }
2788   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2789   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2790   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2791   rowners[0] = 0;
2792   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2793   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2794   rstart = rowners[rank];
2795   rend   = rowners[rank+1];
2796 
2797   /* distribute row lengths to all processors */
2798   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2799   if (!rank) {
2800     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2801     ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
2802     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2803     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2804     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2805     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2806     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2807   } else {
2808     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2809   }
2810 
2811   if (!rank) {   /* procs[0] */
2812     /* calculate the number of nonzeros on each processor */
2813     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2814     for (i=0; i<size; i++) {
2815       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2816         procsnz[i] += rowlengths[j];
2817       }
2818     }
2819     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2820 
2821     /* determine max buffer needed and allocate it */
2822     maxnz = 0;
2823     for (i=0; i<size; i++) {
2824       maxnz = PetscMax(maxnz,procsnz[i]);
2825     }
2826     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2827 
2828     /* read in my part of the matrix column indices  */
2829     nz     = procsnz[0];
2830     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2831     mycols = ibuf;
2832     if (size == 1) nz -= extra_rows;
2833     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
2834     if (size == 1) {
2835       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2836     }
2837 
2838     /* read in every ones (except the last) and ship off */
2839     for (i=1; i<size-1; i++) {
2840       nz   = procsnz[i];
2841       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
2842       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2843     }
2844     /* read in the stuff for the last proc */
2845     if (size != 1) {
2846       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2847       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
2848       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2849       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2850     }
2851     ierr = PetscFree(cols);CHKERRQ(ierr);
2852   } else {  /* procs[i], i>0 */
2853     /* determine buffer space needed for message */
2854     nz = 0;
2855     for (i=0; i<m; i++) nz += locrowlens[i];
2856     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2857     mycols = ibuf;
2858     /* receive message of column indices*/
2859     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2860     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2861     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2862   }
2863 
2864   /* loop over local rows, determining number of off diagonal entries */
2865   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2866   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2867   rowcount = 0;
2868   nzcount  = 0;
2869   for (i=0; i<mbs; i++) {
2870     dcount  = 0;
2871     odcount = 0;
2872     for (j=0; j<bs; j++) {
2873       kmax = locrowlens[rowcount];
2874       for (k=0; k<kmax; k++) {
2875         tmp = mycols[nzcount++]/bs; /* block col. index */
2876         if (!mask[tmp]) {
2877           mask[tmp] = 1;
2878           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2879           else masked1[dcount++] = tmp; /* entry in diag portion */
2880         }
2881       }
2882       rowcount++;
2883     }
2884 
2885     dlens[i]  = dcount;  /* d_nzz[i] */
2886     odlens[i] = odcount; /* o_nzz[i] */
2887 
2888     /* zero out the mask elements we set */
2889     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2890     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2891   }
2892   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2893   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2894   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2895 
2896   if (!rank) {
2897     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2898     /* read in my part of the matrix numerical values  */
2899     nz     = procsnz[0];
2900     vals   = buf;
2901     mycols = ibuf;
2902     if (size == 1) nz -= extra_rows;
2903     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
2904     if (size == 1) {
2905       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2906     }
2907 
2908     /* insert into matrix */
2909     jj = rstart*bs;
2910     for (i=0; i<m; i++) {
2911       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2912       mycols += locrowlens[i];
2913       vals   += locrowlens[i];
2914       jj++;
2915     }
2916 
2917     /* read in other processors (except the last one) and ship out */
2918     for (i=1; i<size-1; i++) {
2919       nz   = procsnz[i];
2920       vals = buf;
2921       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
2922       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2923     }
2924     /* the last proc */
2925     if (size != 1) {
2926       nz   = procsnz[i] - extra_rows;
2927       vals = buf;
2928       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
2929       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2930       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2931     }
2932     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2933 
2934   } else {
2935     /* receive numeric values */
2936     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2937 
2938     /* receive message of values*/
2939     vals   = buf;
2940     mycols = ibuf;
2941     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2942     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2943     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2944 
2945     /* insert into matrix */
2946     jj = rstart*bs;
2947     for (i=0; i<m; i++) {
2948       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2949       mycols += locrowlens[i];
2950       vals   += locrowlens[i];
2951       jj++;
2952     }
2953   }
2954 
2955   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2956   ierr = PetscFree(buf);CHKERRQ(ierr);
2957   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2958   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2959   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2960   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2961   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2962   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2963   PetscFunctionReturn(0);
2964 }
2965 
2966 /*XXXXX@
2967    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2968 
2969    Input Parameters:
2970 .  mat  - the matrix
2971 .  fact - factor
2972 
2973    Not Collective on Mat, each process can have a different hash factor
2974 
2975    Level: advanced
2976 
2977   Notes:
2978    This can also be set by the command line option: -mat_use_hash_table fact
2979 
2980 .seealso: MatSetOption()
2981 @XXXXX*/
2982 
2983 
2984 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2985 {
2986   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2987   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2988   PetscReal      atmp;
2989   PetscReal      *work,*svalues,*rvalues;
2990   PetscErrorCode ierr;
2991   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2992   PetscMPIInt    rank,size;
2993   PetscInt       *rowners_bs,dest,count,source;
2994   PetscScalar    *va;
2995   MatScalar      *ba;
2996   MPI_Status     stat;
2997 
2998   PetscFunctionBegin;
2999   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
3000   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
3001   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3002 
3003   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3004   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3005 
3006   bs  = A->rmap->bs;
3007   mbs = a->mbs;
3008   Mbs = a->Mbs;
3009   ba  = b->a;
3010   bi  = b->i;
3011   bj  = b->j;
3012 
3013   /* find ownerships */
3014   rowners_bs = A->rmap->range;
3015 
3016   /* each proc creates an array to be distributed */
3017   ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr);
3018 
3019   /* row_max for B */
3020   if (rank != size-1) {
3021     for (i=0; i<mbs; i++) {
3022       ncols = bi[1] - bi[0]; bi++;
3023       brow  = bs*i;
3024       for (j=0; j<ncols; j++) {
3025         bcol = bs*(*bj);
3026         for (kcol=0; kcol<bs; kcol++) {
3027           col  = bcol + kcol;                /* local col index */
3028           col += rowners_bs[rank+1];      /* global col index */
3029           for (krow=0; krow<bs; krow++) {
3030             atmp = PetscAbsScalar(*ba); ba++;
3031             row  = brow + krow;   /* local row index */
3032             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
3033             if (work[col] < atmp) work[col] = atmp;
3034           }
3035         }
3036         bj++;
3037       }
3038     }
3039 
3040     /* send values to its owners */
3041     for (dest=rank+1; dest<size; dest++) {
3042       svalues = work + rowners_bs[dest];
3043       count   = rowners_bs[dest+1]-rowners_bs[dest];
3044       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
3045     }
3046   }
3047 
3048   /* receive values */
3049   if (rank) {
3050     rvalues = work;
3051     count   = rowners_bs[rank+1]-rowners_bs[rank];
3052     for (source=0; source<rank; source++) {
3053       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
3054       /* process values */
3055       for (i=0; i<count; i++) {
3056         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
3057       }
3058     }
3059   }
3060 
3061   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3062   ierr = PetscFree(work);CHKERRQ(ierr);
3063   PetscFunctionReturn(0);
3064 }
3065 
3066 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3067 {
3068   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
3069   PetscErrorCode    ierr;
3070   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
3071   PetscScalar       *x,*ptr,*from;
3072   Vec               bb1;
3073   const PetscScalar *b;
3074 
3075   PetscFunctionBegin;
3076   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3077   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3078 
3079   if (flag == SOR_APPLY_UPPER) {
3080     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3081     PetscFunctionReturn(0);
3082   }
3083 
3084   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3085     if (flag & SOR_ZERO_INITIAL_GUESS) {
3086       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3087       its--;
3088     }
3089 
3090     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3091     while (its--) {
3092 
3093       /* lower triangular part: slvec0b = - B^T*xx */
3094       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3095 
3096       /* copy xx into slvec0a */
3097       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3098       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3099       ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr);
3100       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3101 
3102       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
3103 
3104       /* copy bb into slvec1a */
3105       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3106       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3107       ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr);
3108       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3109 
3110       /* set slvec1b = 0 */
3111       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3112 
3113       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3114       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3115       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3116       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3117 
3118       /* upper triangular part: bb1 = bb1 - B*x */
3119       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
3120 
3121       /* local diagonal sweep */
3122       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3123     }
3124     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3125   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3126     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3127   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3128     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3129   } else if (flag & SOR_EISENSTAT) {
3130     Vec               xx1;
3131     PetscBool         hasop;
3132     const PetscScalar *diag;
3133     PetscScalar       *sl,scale = (omega - 2.0)/omega;
3134     PetscInt          i,n;
3135 
3136     if (!mat->xx1) {
3137       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
3138       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
3139     }
3140     xx1 = mat->xx1;
3141     bb1 = mat->bb1;
3142 
3143     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
3144 
3145     if (!mat->diag) {
3146       /* this is wrong for same matrix with new nonzero values */
3147       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
3148       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
3149     }
3150     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
3151 
3152     if (hasop) {
3153       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
3154       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3155     } else {
3156       /*
3157           These two lines are replaced by code that may be a bit faster for a good compiler
3158       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
3159       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3160       */
3161       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3162       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3163       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3164       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3165       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
3166       if (omega == 1.0) {
3167         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3168         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
3169       } else {
3170         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3171         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
3172       }
3173       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3174       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3175       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3176       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3177     }
3178 
3179     /* multiply off-diagonal portion of matrix */
3180     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3181     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3182     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
3183     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3184     ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
3185     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
3186     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3187     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3188     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3189     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
3190 
3191     /* local sweep */
3192     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
3193     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
3194   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3195   PetscFunctionReturn(0);
3196 }
3197 
3198 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3199 {
3200   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3201   PetscErrorCode ierr;
3202   Vec            lvec1,bb1;
3203 
3204   PetscFunctionBegin;
3205   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3206   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3207 
3208   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3209     if (flag & SOR_ZERO_INITIAL_GUESS) {
3210       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3211       its--;
3212     }
3213 
3214     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3215     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3216     while (its--) {
3217       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3218 
3219       /* lower diagonal part: bb1 = bb - B^T*xx */
3220       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3221       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3222 
3223       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3224       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3225       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3226 
3227       /* upper diagonal part: bb1 = bb1 - B*x */
3228       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3229       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3230 
3231       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3232 
3233       /* diagonal sweep */
3234       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3235     }
3236     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3237     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3238   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3239   PetscFunctionReturn(0);
3240 }
3241 
3242 /*@
3243      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3244          CSR format the local rows.
3245 
3246    Collective
3247 
3248    Input Parameters:
3249 +  comm - MPI communicator
3250 .  bs - the block size, only a block size of 1 is supported
3251 .  m - number of local rows (Cannot be PETSC_DECIDE)
3252 .  n - This value should be the same as the local size used in creating the
3253        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3254        calculated if N is given) For square matrices n is almost always m.
3255 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3256 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3257 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
3258 .   j - column indices
3259 -   a - matrix values
3260 
3261    Output Parameter:
3262 .   mat - the matrix
3263 
3264    Level: intermediate
3265 
3266    Notes:
3267        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3268      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3269      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3270 
3271        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3272 
3273 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3274           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3275 @*/
3276 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3277 {
3278   PetscErrorCode ierr;
3279 
3280 
3281   PetscFunctionBegin;
3282   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3283   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3284   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3285   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3286   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3287   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3288   PetscFunctionReturn(0);
3289 }
3290 
3291 
3292 /*@C
3293    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
3294 
3295    Collective
3296 
3297    Input Parameters:
3298 +  B - the matrix
3299 .  bs - the block size
3300 .  i - the indices into j for the start of each local row (starts with zero)
3301 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3302 -  v - optional values in the matrix
3303 
3304    Level: advanced
3305 
3306    Notes:
3307    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
3308    and usually the numerical values as well
3309 
3310    Any entries below the diagonal are ignored
3311 
3312 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3313 @*/
3314 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3315 {
3316   PetscErrorCode ierr;
3317 
3318   PetscFunctionBegin;
3319   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3320   PetscFunctionReturn(0);
3321 }
3322 
3323 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3324 {
3325   PetscErrorCode ierr;
3326   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3327   PetscInt       *indx;
3328   PetscScalar    *values;
3329 
3330   PetscFunctionBegin;
3331   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3332   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3333     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3334     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3335     PetscInt       *bindx,rmax=a->rmax,j;
3336     PetscMPIInt    rank,size;
3337 
3338     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3339     mbs = m/bs; Nbs = N/cbs;
3340     if (n == PETSC_DECIDE) {
3341       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3342     }
3343     nbs = n/cbs;
3344 
3345     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3346     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3347 
3348     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3349     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3350     if (rank == size-1) {
3351       /* Check sum(nbs) = Nbs */
3352       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3353     }
3354 
3355     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3356     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3357     for (i=0; i<mbs; i++) {
3358       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3359       nnz  = nnz/bs;
3360       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3361       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3362       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3363     }
3364     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3365     ierr = PetscFree(bindx);CHKERRQ(ierr);
3366 
3367     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3368     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3369     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3370     ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr);
3371     ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3372     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3373     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3374   }
3375 
3376   /* numeric phase */
3377   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3378   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3379 
3380   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3381   for (i=0; i<m; i++) {
3382     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3383     Ii   = i + rstart;
3384     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3385     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3386   }
3387   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3388   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3389   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3390   PetscFunctionReturn(0);
3391 }
3392