xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 4e4bbfaa3814cc83b5851d85be69070845f5653e)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 /* This could be moved to matimpl.h */
12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
13 {
14   Mat            preallocator;
15   PetscInt       r,rstart,rend;
16   PetscInt       bs,i,m,n,M,N;
17   PetscBool      cong = PETSC_TRUE;
18   PetscErrorCode ierr;
19 
20   PetscFunctionBegin;
21   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
22   PetscValidLogicalCollectiveInt(B,nm,2);
23   for (i = 0; i < nm; i++) {
24     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
25     ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr);
26     if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
27   }
28   PetscValidLogicalCollectiveBool(B,fill,5);
29   ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr);
30   ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr);
31   ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
32   ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr);
33   ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
34   ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr);
35   ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
36   ierr = MatSetUp(preallocator);CHKERRQ(ierr);
37   ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
38   for (r = rstart; r < rend; ++r) {
39     PetscInt          ncols;
40     const PetscInt    *row;
41     const PetscScalar *vals;
42 
43     for (i = 0; i < nm; i++) {
44       ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
45       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
46       if (symm && symm[i]) {
47         ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
48       }
49       ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
50     }
51   }
52   ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
53   ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
54   ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr);
55   ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 
59 static PetscErrorCode MatConvert_MPISBAIJ_XAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
60 {
61   Mat            B;
62   PetscErrorCode ierr;
63   PetscInt       r;
64 
65   PetscFunctionBegin;
66   if (reuse != MAT_REUSE_MATRIX) {
67     PetscBool symm = PETSC_TRUE;
68     PetscInt  bs;
69 
70     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
71     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
72     ierr = MatSetType(B,newtype);CHKERRQ(ierr);
73     ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
74     ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr);
75     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
76     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
77     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
78     ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr);
79     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
80   } else B = *newmat;
81 
82   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
83   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
84     PetscInt          ncols;
85     const PetscInt    *row;
86     const PetscScalar *vals;
87 
88     ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
89     ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
90     ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
91     ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
92   }
93   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
94   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
95   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
96 
97   if (reuse == MAT_INPLACE_MATRIX) {
98     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
99   } else {
100     *newmat = B;
101   }
102   PetscFunctionReturn(0);
103 }
104 
105 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
106 {
107   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
108   PetscErrorCode ierr;
109 
110   PetscFunctionBegin;
111   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
112   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
117 {
118   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
119   PetscErrorCode ierr;
120 
121   PetscFunctionBegin;
122   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
123   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
124   PetscFunctionReturn(0);
125 }
126 
127 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
128   { \
129     brow = row/bs;  \
130     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
131     rmax = aimax[brow]; nrow = ailen[brow]; \
132     bcol = col/bs; \
133     ridx = row % bs; cidx = col % bs; \
134     low  = 0; high = nrow; \
135     while (high-low > 3) { \
136       t = (low+high)/2; \
137       if (rp[t] > bcol) high = t; \
138       else              low  = t; \
139     } \
140     for (_i=low; _i<high; _i++) { \
141       if (rp[_i] > bcol) break; \
142       if (rp[_i] == bcol) { \
143         bap = ap + bs2*_i + bs*cidx + ridx; \
144         if (addv == ADD_VALUES) *bap += value;  \
145         else                    *bap  = value;  \
146         goto a_noinsert; \
147       } \
148     } \
149     if (a->nonew == 1) goto a_noinsert; \
150     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
151     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
152     N = nrow++ - 1;  \
153     /* shift up all the later entries in this row */ \
154     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
155     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
156     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
157     rp[_i]                      = bcol;  \
158     ap[bs2*_i + bs*cidx + ridx] = value;  \
159     A->nonzerostate++;\
160 a_noinsert:; \
161     ailen[brow] = nrow; \
162   }
163 
164 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
165   { \
166     brow = row/bs;  \
167     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
168     rmax = bimax[brow]; nrow = bilen[brow]; \
169     bcol = col/bs; \
170     ridx = row % bs; cidx = col % bs; \
171     low  = 0; high = nrow; \
172     while (high-low > 3) { \
173       t = (low+high)/2; \
174       if (rp[t] > bcol) high = t; \
175       else              low  = t; \
176     } \
177     for (_i=low; _i<high; _i++) { \
178       if (rp[_i] > bcol) break; \
179       if (rp[_i] == bcol) { \
180         bap = ap + bs2*_i + bs*cidx + ridx; \
181         if (addv == ADD_VALUES) *bap += value;  \
182         else                    *bap  = value;  \
183         goto b_noinsert; \
184       } \
185     } \
186     if (b->nonew == 1) goto b_noinsert; \
187     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
188     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
189     N = nrow++ - 1;  \
190     /* shift up all the later entries in this row */ \
191     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
192     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
193     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \
194     rp[_i]                      = bcol;  \
195     ap[bs2*_i + bs*cidx + ridx] = value;  \
196     B->nonzerostate++;\
197 b_noinsert:; \
198     bilen[brow] = nrow; \
199   }
200 
201 /* Only add/insert a(i,j) with i<=j (blocks).
202    Any a(i,j) with i>j input by user is ingored or generates an error
203 */
204 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
205 {
206   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
207   MatScalar      value;
208   PetscBool      roworiented = baij->roworiented;
209   PetscErrorCode ierr;
210   PetscInt       i,j,row,col;
211   PetscInt       rstart_orig=mat->rmap->rstart;
212   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
213   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
214 
215   /* Some Variables required in the macro */
216   Mat          A     = baij->A;
217   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
218   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
219   MatScalar    *aa   =a->a;
220 
221   Mat         B     = baij->B;
222   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
223   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
224   MatScalar   *ba   =b->a;
225 
226   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
227   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
228   MatScalar *ap,*bap;
229 
230   /* for stash */
231   PetscInt  n_loc, *in_loc = NULL;
232   MatScalar *v_loc = NULL;
233 
234   PetscFunctionBegin;
235   if (!baij->donotstash) {
236     if (n > baij->n_loc) {
237       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
238       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
239       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
240       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
241 
242       baij->n_loc = n;
243     }
244     in_loc = baij->in_loc;
245     v_loc  = baij->v_loc;
246   }
247 
248   for (i=0; i<m; i++) {
249     if (im[i] < 0) continue;
250 #if defined(PETSC_USE_DEBUG)
251     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
252 #endif
253     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
254       row = im[i] - rstart_orig;              /* local row index */
255       for (j=0; j<n; j++) {
256         if (im[i]/bs > in[j]/bs) {
257           if (a->ignore_ltriangular) {
258             continue;    /* ignore lower triangular blocks */
259           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
260         }
261         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
262           col  = in[j] - cstart_orig;         /* local col index */
263           brow = row/bs; bcol = col/bs;
264           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
265           if (roworiented) value = v[i*n+j];
266           else             value = v[i+j*m];
267           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
268           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
269         } else if (in[j] < 0) continue;
270 #if defined(PETSC_USE_DEBUG)
271         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
272 #endif
273         else {  /* off-diag entry (B) */
274           if (mat->was_assembled) {
275             if (!baij->colmap) {
276               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
277             }
278 #if defined(PETSC_USE_CTABLE)
279             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
280             col  = col - 1;
281 #else
282             col = baij->colmap[in[j]/bs] - 1;
283 #endif
284             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
285               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
286               col  =  in[j];
287               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
288               B    = baij->B;
289               b    = (Mat_SeqBAIJ*)(B)->data;
290               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
291               ba   = b->a;
292             } else col += in[j]%bs;
293           } else col = in[j];
294           if (roworiented) value = v[i*n+j];
295           else             value = v[i+j*m];
296           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
297           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
298         }
299       }
300     } else {  /* off processor entry */
301       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
302       if (!baij->donotstash) {
303         mat->assembled = PETSC_FALSE;
304         n_loc          = 0;
305         for (j=0; j<n; j++) {
306           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
307           in_loc[n_loc] = in[j];
308           if (roworiented) {
309             v_loc[n_loc] = v[i*n+j];
310           } else {
311             v_loc[n_loc] = v[j*m+i];
312           }
313           n_loc++;
314         }
315         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
316       }
317     }
318   }
319   PetscFunctionReturn(0);
320 }
321 
322 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
323 {
324   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
325   PetscErrorCode    ierr;
326   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
327   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
328   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
329   PetscBool         roworiented=a->roworiented;
330   const PetscScalar *value     = v;
331   MatScalar         *ap,*aa = a->a,*bap;
332 
333   PetscFunctionBegin;
334   if (col < row) {
335     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
336     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
337   }
338   rp   = aj + ai[row];
339   ap   = aa + bs2*ai[row];
340   rmax = imax[row];
341   nrow = ailen[row];
342   value = v;
343   low   = 0;
344   high  = nrow;
345 
346   while (high-low > 7) {
347     t = (low+high)/2;
348     if (rp[t] > col) high = t;
349     else             low  = t;
350   }
351   for (i=low; i<high; i++) {
352     if (rp[i] > col) break;
353     if (rp[i] == col) {
354       bap = ap +  bs2*i;
355       if (roworiented) {
356         if (is == ADD_VALUES) {
357           for (ii=0; ii<bs; ii++) {
358             for (jj=ii; jj<bs2; jj+=bs) {
359               bap[jj] += *value++;
360             }
361           }
362         } else {
363           for (ii=0; ii<bs; ii++) {
364             for (jj=ii; jj<bs2; jj+=bs) {
365               bap[jj] = *value++;
366             }
367           }
368         }
369       } else {
370         if (is == ADD_VALUES) {
371           for (ii=0; ii<bs; ii++) {
372             for (jj=0; jj<bs; jj++) {
373               *bap++ += *value++;
374             }
375           }
376         } else {
377           for (ii=0; ii<bs; ii++) {
378             for (jj=0; jj<bs; jj++) {
379               *bap++  = *value++;
380             }
381           }
382         }
383       }
384       goto noinsert2;
385     }
386   }
387   if (nonew == 1) goto noinsert2;
388   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
389   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
390   N = nrow++ - 1; high++;
391   /* shift up all the later entries in this row */
392   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
393   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
394   rp[i] = col;
395   bap   = ap +  bs2*i;
396   if (roworiented) {
397     for (ii=0; ii<bs; ii++) {
398       for (jj=ii; jj<bs2; jj+=bs) {
399         bap[jj] = *value++;
400       }
401     }
402   } else {
403     for (ii=0; ii<bs; ii++) {
404       for (jj=0; jj<bs; jj++) {
405         *bap++ = *value++;
406       }
407     }
408   }
409   noinsert2:;
410   ailen[row] = nrow;
411   PetscFunctionReturn(0);
412 }
413 
414 /*
415    This routine is exactly duplicated in mpibaij.c
416 */
417 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
418 {
419   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
420   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
421   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
422   PetscErrorCode    ierr;
423   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
424   PetscBool         roworiented=a->roworiented;
425   const PetscScalar *value     = v;
426   MatScalar         *ap,*aa = a->a,*bap;
427 
428   PetscFunctionBegin;
429   rp   = aj + ai[row];
430   ap   = aa + bs2*ai[row];
431   rmax = imax[row];
432   nrow = ailen[row];
433   low  = 0;
434   high = nrow;
435   value = v;
436   while (high-low > 7) {
437     t = (low+high)/2;
438     if (rp[t] > col) high = t;
439     else             low  = t;
440   }
441   for (i=low; i<high; i++) {
442     if (rp[i] > col) break;
443     if (rp[i] == col) {
444       bap = ap +  bs2*i;
445       if (roworiented) {
446         if (is == ADD_VALUES) {
447           for (ii=0; ii<bs; ii++) {
448             for (jj=ii; jj<bs2; jj+=bs) {
449               bap[jj] += *value++;
450             }
451           }
452         } else {
453           for (ii=0; ii<bs; ii++) {
454             for (jj=ii; jj<bs2; jj+=bs) {
455               bap[jj] = *value++;
456             }
457           }
458         }
459       } else {
460         if (is == ADD_VALUES) {
461           for (ii=0; ii<bs; ii++,value+=bs) {
462             for (jj=0; jj<bs; jj++) {
463               bap[jj] += value[jj];
464             }
465             bap += bs;
466           }
467         } else {
468           for (ii=0; ii<bs; ii++,value+=bs) {
469             for (jj=0; jj<bs; jj++) {
470               bap[jj]  = value[jj];
471             }
472             bap += bs;
473           }
474         }
475       }
476       goto noinsert2;
477     }
478   }
479   if (nonew == 1) goto noinsert2;
480   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
481   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
482   N = nrow++ - 1; high++;
483   /* shift up all the later entries in this row */
484   ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
485   ierr  = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
486   rp[i] = col;
487   bap   = ap +  bs2*i;
488   if (roworiented) {
489     for (ii=0; ii<bs; ii++) {
490       for (jj=ii; jj<bs2; jj+=bs) {
491         bap[jj] = *value++;
492       }
493     }
494   } else {
495     for (ii=0; ii<bs; ii++) {
496       for (jj=0; jj<bs; jj++) {
497         *bap++ = *value++;
498       }
499     }
500   }
501   noinsert2:;
502   ailen[row] = nrow;
503   PetscFunctionReturn(0);
504 }
505 
506 /*
507     This routine could be optimized by removing the need for the block copy below and passing stride information
508   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
509 */
510 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
511 {
512   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
513   const MatScalar *value;
514   MatScalar       *barray     =baij->barray;
515   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
516   PetscErrorCode  ierr;
517   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
518   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
519   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
520 
521   PetscFunctionBegin;
522   if (!barray) {
523     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
524     baij->barray = barray;
525   }
526 
527   if (roworiented) {
528     stepval = (n-1)*bs;
529   } else {
530     stepval = (m-1)*bs;
531   }
532   for (i=0; i<m; i++) {
533     if (im[i] < 0) continue;
534 #if defined(PETSC_USE_DEBUG)
535     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
536 #endif
537     if (im[i] >= rstart && im[i] < rend) {
538       row = im[i] - rstart;
539       for (j=0; j<n; j++) {
540         if (im[i] > in[j]) {
541           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
542           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
543         }
544         /* If NumCol = 1 then a copy is not required */
545         if ((roworiented) && (n == 1)) {
546           barray = (MatScalar*) v + i*bs2;
547         } else if ((!roworiented) && (m == 1)) {
548           barray = (MatScalar*) v + j*bs2;
549         } else { /* Here a copy is required */
550           if (roworiented) {
551             value = v + i*(stepval+bs)*bs + j*bs;
552           } else {
553             value = v + j*(stepval+bs)*bs + i*bs;
554           }
555           for (ii=0; ii<bs; ii++,value+=stepval) {
556             for (jj=0; jj<bs; jj++) {
557               *barray++ = *value++;
558             }
559           }
560           barray -=bs2;
561         }
562 
563         if (in[j] >= cstart && in[j] < cend) {
564           col  = in[j] - cstart;
565           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
566         } else if (in[j] < 0) continue;
567 #if defined(PETSC_USE_DEBUG)
568         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
569 #endif
570         else {
571           if (mat->was_assembled) {
572             if (!baij->colmap) {
573               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
574             }
575 
576 #if defined(PETSC_USE_DEBUG)
577 #if defined(PETSC_USE_CTABLE)
578             { PetscInt data;
579               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
580               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
581             }
582 #else
583             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
584 #endif
585 #endif
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col  = (col - 1)/bs;
589 #else
590             col = (baij->colmap[in[j]] - 1)/bs;
591 #endif
592             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
593               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
594               col  = in[j];
595             }
596           } else col = in[j];
597           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
598         }
599       }
600     } else {
601       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
602       if (!baij->donotstash) {
603         if (roworiented) {
604           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
605         } else {
606           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
607         }
608       }
609     }
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
615 {
616   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
617   PetscErrorCode ierr;
618   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
619   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
620 
621   PetscFunctionBegin;
622   for (i=0; i<m; i++) {
623     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
624     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
625     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
626       row = idxm[i] - bsrstart;
627       for (j=0; j<n; j++) {
628         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
629         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
630         if (idxn[j] >= bscstart && idxn[j] < bscend) {
631           col  = idxn[j] - bscstart;
632           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633         } else {
634           if (!baij->colmap) {
635             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
636           }
637 #if defined(PETSC_USE_CTABLE)
638           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
639           data--;
640 #else
641           data = baij->colmap[idxn[j]/bs]-1;
642 #endif
643           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
644           else {
645             col  = data + idxn[j]%bs;
646             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647           }
648         }
649       }
650     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
651   }
652   PetscFunctionReturn(0);
653 }
654 
655 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
656 {
657   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
658   PetscErrorCode ierr;
659   PetscReal      sum[2],*lnorm2;
660 
661   PetscFunctionBegin;
662   if (baij->size == 1) {
663     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
664   } else {
665     if (type == NORM_FROBENIUS) {
666       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
667       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
668       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
669       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
670       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
671       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
672       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
673       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
674     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
675       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
676       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
677       PetscReal    *rsum,*rsum2,vabs;
678       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
679       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
680       MatScalar    *v;
681 
682       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
683       ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr);
684       /* Amat */
685       v = amat->a; jj = amat->j;
686       for (brow=0; brow<mbs; brow++) {
687         grow = bs*(rstart + brow);
688         nz   = amat->i[brow+1] - amat->i[brow];
689         for (bcol=0; bcol<nz; bcol++) {
690           gcol = bs*(rstart + *jj); jj++;
691           for (col=0; col<bs; col++) {
692             for (row=0; row<bs; row++) {
693               vabs            = PetscAbsScalar(*v); v++;
694               rsum[gcol+col] += vabs;
695               /* non-diagonal block */
696               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
697             }
698           }
699         }
700         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
701       }
702       /* Bmat */
703       v = bmat->a; jj = bmat->j;
704       for (brow=0; brow<mbs; brow++) {
705         grow = bs*(rstart + brow);
706         nz = bmat->i[brow+1] - bmat->i[brow];
707         for (bcol=0; bcol<nz; bcol++) {
708           gcol = bs*garray[*jj]; jj++;
709           for (col=0; col<bs; col++) {
710             for (row=0; row<bs; row++) {
711               vabs            = PetscAbsScalar(*v); v++;
712               rsum[gcol+col] += vabs;
713               rsum[grow+row] += vabs;
714             }
715           }
716         }
717         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
718       }
719       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
720       *norm = 0.0;
721       for (col=0; col<mat->cmap->N; col++) {
722         if (rsum2[col] > *norm) *norm = rsum2[col];
723       }
724       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
725     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
726   }
727   PetscFunctionReturn(0);
728 }
729 
730 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
731 {
732   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
733   PetscErrorCode ierr;
734   PetscInt       nstash,reallocs;
735 
736   PetscFunctionBegin;
737   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
738 
739   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
740   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
741   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
742   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
743   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
744   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
745   PetscFunctionReturn(0);
746 }
747 
748 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
749 {
750   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
751   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
752   PetscErrorCode ierr;
753   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
754   PetscInt       *row,*col;
755   PetscBool      other_disassembled;
756   PetscMPIInt    n;
757   PetscBool      r1,r2,r3;
758   MatScalar      *val;
759 
760   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
761   PetscFunctionBegin;
762   if (!baij->donotstash &&  !mat->nooffprocentries) {
763     while (1) {
764       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
765       if (!flg) break;
766 
767       for (i=0; i<n;) {
768         /* Now identify the consecutive vals belonging to the same row */
769         for (j=i,rstart=row[j]; j<n; j++) {
770           if (row[j] != rstart) break;
771         }
772         if (j < n) ncols = j-i;
773         else       ncols = n-i;
774         /* Now assemble all these values with a single function call */
775         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
776         i    = j;
777       }
778     }
779     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
780     /* Now process the block-stash. Since the values are stashed column-oriented,
781        set the roworiented flag to column oriented, and after MatSetValues()
782        restore the original flags */
783     r1 = baij->roworiented;
784     r2 = a->roworiented;
785     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
786 
787     baij->roworiented = PETSC_FALSE;
788     a->roworiented    = PETSC_FALSE;
789 
790     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
791     while (1) {
792       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
793       if (!flg) break;
794 
795       for (i=0; i<n;) {
796         /* Now identify the consecutive vals belonging to the same row */
797         for (j=i,rstart=row[j]; j<n; j++) {
798           if (row[j] != rstart) break;
799         }
800         if (j < n) ncols = j-i;
801         else       ncols = n-i;
802         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
803         i    = j;
804       }
805     }
806     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
807 
808     baij->roworiented = r1;
809     a->roworiented    = r2;
810 
811     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
812   }
813 
814   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
815   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
816 
817   /* determine if any processor has disassembled, if so we must
818      also disassemble ourselfs, in order that we may reassemble. */
819   /*
820      if nonzero structure of submatrix B cannot change then we know that
821      no processor disassembled thus we can skip this stuff
822   */
823   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
824     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
825     if (mat->was_assembled && !other_disassembled) {
826       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
827     }
828   }
829 
830   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
831     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
832   }
833   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
834   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
835 
836   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
837 
838   baij->rowvalues = 0;
839 
840   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
841   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
842     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
843     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
844   }
845   PetscFunctionReturn(0);
846 }
847 
848 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
849 #include <petscdraw.h>
850 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
851 {
852   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
853   PetscErrorCode    ierr;
854   PetscInt          bs   = mat->rmap->bs;
855   PetscMPIInt       rank = baij->rank;
856   PetscBool         iascii,isdraw;
857   PetscViewer       sviewer;
858   PetscViewerFormat format;
859 
860   PetscFunctionBegin;
861   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
862   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
863   if (iascii) {
864     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
865     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
866       MatInfo info;
867       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
868       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
869       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
870       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
871       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
872       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
873       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
874       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
875       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
876       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
877       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
878       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
879       PetscFunctionReturn(0);
880     } else if (format == PETSC_VIEWER_ASCII_INFO) {
881       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
882       PetscFunctionReturn(0);
883     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
884       PetscFunctionReturn(0);
885     }
886   }
887 
888   if (isdraw) {
889     PetscDraw draw;
890     PetscBool isnull;
891     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
892     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
893     if (isnull) PetscFunctionReturn(0);
894   }
895 
896   {
897     /* assemble the entire matrix onto first processor. */
898     Mat          A;
899     Mat_SeqSBAIJ *Aloc;
900     Mat_SeqBAIJ  *Bloc;
901     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
902     MatScalar    *a;
903     const char   *matname;
904 
905     /* Should this be the same type as mat? */
906     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
907     if (!rank) {
908       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
909     } else {
910       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
911     }
912     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
913     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
914     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
915     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
916 
917     /* copy over the A part */
918     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
919     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
920     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
921 
922     for (i=0; i<mbs; i++) {
923       rvals[0] = bs*(baij->rstartbs + i);
924       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
925       for (j=ai[i]; j<ai[i+1]; j++) {
926         col = (baij->cstartbs+aj[j])*bs;
927         for (k=0; k<bs; k++) {
928           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
929           col++;
930           a += bs;
931         }
932       }
933     }
934     /* copy over the B part */
935     Bloc = (Mat_SeqBAIJ*)baij->B->data;
936     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
937     for (i=0; i<mbs; i++) {
938 
939       rvals[0] = bs*(baij->rstartbs + i);
940       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
941       for (j=ai[i]; j<ai[i+1]; j++) {
942         col = baij->garray[aj[j]]*bs;
943         for (k=0; k<bs; k++) {
944           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
945           col++;
946           a += bs;
947         }
948       }
949     }
950     ierr = PetscFree(rvals);CHKERRQ(ierr);
951     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
952     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
953     /*
954        Everyone has to call to draw the matrix since the graphics waits are
955        synchronized across all processors that share the PetscDraw object
956     */
957     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
958     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
959     if (!rank) {
960       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
961       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
962     }
963     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
964     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
965     ierr = MatDestroy(&A);CHKERRQ(ierr);
966   }
967   PetscFunctionReturn(0);
968 }
969 
970 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
971 {
972   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
973   Mat_SeqSBAIJ   *A = (Mat_SeqSBAIJ*)a->A->data;
974   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
975   PetscErrorCode ierr;
976   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
977   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
978   int            fd;
979   PetscScalar    *column_values;
980   FILE           *file;
981   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
982   PetscInt       message_count,flowcontrolcount;
983 
984   PetscFunctionBegin;
985   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
986   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
987   nz   = bs2*(A->nz + B->nz);
988   rlen = mat->rmap->n;
989   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
990   if (!rank) {
991     header[0] = MAT_FILE_CLASSID;
992     header[1] = mat->rmap->N;
993     header[2] = mat->cmap->N;
994 
995     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
996     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
997     /* get largest number of rows any processor has */
998     range = mat->rmap->range;
999     for (i=1; i<size; i++) {
1000       rlen = PetscMax(rlen,range[i+1] - range[i]);
1001     }
1002   } else {
1003     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1004   }
1005 
1006   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1007   /* compute lengths of each row  */
1008   for (i=0; i<a->mbs; i++) {
1009     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1010   }
1011   /* store the row lengths to the file */
1012   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1013   if (!rank) {
1014     MPI_Status status;
1015     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1016     rlen = (range[1] - range[0])/bs;
1017     for (i=0; i<rlen; i++) {
1018       for (j=0; j<bs; j++) {
1019         row_lens[i*bs+j] = bs*crow_lens[i];
1020       }
1021     }
1022     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       rlen = (range[i+1] - range[i])/bs;
1025       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1026       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1027       for (k=0; k<rlen; k++) {
1028         for (j=0; j<bs; j++) {
1029           row_lens[k*bs+j] = bs*crow_lens[k];
1030         }
1031       }
1032       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1033     }
1034     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1035     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1036   } else {
1037     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1038     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1039     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1040   }
1041   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1042 
1043   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1044      information needed to make it for each row from a block row. This does require more communication but still not more than
1045      the communication needed for the nonzero values  */
1046   nzmax = nz; /*  space a largest processor needs */
1047   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1048   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1049   cnt   = 0;
1050   for (i=0; i<a->mbs; i++) {
1051     pcnt = cnt;
1052     for (j=B->i[i]; j<B->i[i+1]; j++) {
1053       if ((col = garray[B->j[j]]) > cstart) break;
1054       for (l=0; l<bs; l++) {
1055         column_indices[cnt++] = bs*col+l;
1056       }
1057     }
1058     for (k=A->i[i]; k<A->i[i+1]; k++) {
1059       for (l=0; l<bs; l++) {
1060         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1061       }
1062     }
1063     for (; j<B->i[i+1]; j++) {
1064       for (l=0; l<bs; l++) {
1065         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1066       }
1067     }
1068     len = cnt - pcnt;
1069     for (k=1; k<bs; k++) {
1070       ierr = PetscArraycpy(&column_indices[cnt],&column_indices[pcnt],len);CHKERRQ(ierr);
1071       cnt += len;
1072     }
1073   }
1074   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1075 
1076   /* store the columns to the file */
1077   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1078   if (!rank) {
1079     MPI_Status status;
1080     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1081     for (i=1; i<size; i++) {
1082       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1083       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1084       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1085       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1086     }
1087     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1088   } else {
1089     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1090     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1091     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1092     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1095 
1096   /* load up the numerical values */
1097   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1098   cnt  = 0;
1099   for (i=0; i<a->mbs; i++) {
1100     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1101     for (j=B->i[i]; j<B->i[i+1]; j++) {
1102       if (garray[B->j[j]] > cstart) break;
1103       for (l=0; l<bs; l++) {
1104         for (ll=0; ll<bs; ll++) {
1105           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1106         }
1107       }
1108       cnt += bs;
1109     }
1110     for (k=A->i[i]; k<A->i[i+1]; k++) {
1111       for (l=0; l<bs; l++) {
1112         for (ll=0; ll<bs; ll++) {
1113           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1114         }
1115       }
1116       cnt += bs;
1117     }
1118     for (; j<B->i[i+1]; j++) {
1119       for (l=0; l<bs; l++) {
1120         for (ll=0; ll<bs; ll++) {
1121           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1122         }
1123       }
1124       cnt += bs;
1125     }
1126     cnt += (bs-1)*rlen;
1127   }
1128   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1129 
1130   /* store the column values to the file */
1131   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1132   if (!rank) {
1133     MPI_Status status;
1134     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1135     for (i=1; i<size; i++) {
1136       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1137       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1138       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1139       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1140     }
1141     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1142   } else {
1143     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1144     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1145     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1146     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1147   }
1148   ierr = PetscFree(column_values);CHKERRQ(ierr);
1149 
1150   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1151   if (file) {
1152     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1153   }
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1158 {
1159   PetscErrorCode ierr;
1160   PetscBool      iascii,isdraw,issocket,isbinary;
1161 
1162   PetscFunctionBegin;
1163   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1164   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1165   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1166   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1167   if (iascii || isdraw || issocket) {
1168     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1169   } else if (isbinary) {
1170     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1171   }
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1176 {
1177   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1178   PetscErrorCode ierr;
1179 
1180   PetscFunctionBegin;
1181 #if defined(PETSC_USE_LOG)
1182   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1183 #endif
1184   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1185   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1186   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1187   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1188 #if defined(PETSC_USE_CTABLE)
1189   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1190 #else
1191   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1192 #endif
1193   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1194   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1195   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1196   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1197   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1198   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1199   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1200   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1201   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1202   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1203   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1204   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1205   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1206   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1207   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1208 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1209   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1210 #endif
1211   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1212   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1213   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1214   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1215 
1216   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1221 #if defined(PETSC_HAVE_ELEMENTAL)
1222   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1223 #endif
1224   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1230 {
1231   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1232   PetscErrorCode    ierr;
1233   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1234   PetscScalar       *from;
1235   const PetscScalar *x;
1236 
1237   PetscFunctionBegin;
1238   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1239   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1240 
1241   /* diagonal part */
1242   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1243   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1244 
1245   /* subdiagonal part */
1246   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1247 
1248   /* copy x into the vec slvec0 */
1249   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1250   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1251 
1252   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1253   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1254   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1255 
1256   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1257   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1258   /* supperdiagonal part */
1259   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1264 {
1265   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1266   PetscErrorCode    ierr;
1267   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1268   PetscScalar       *from;
1269   const PetscScalar *x;
1270 
1271   PetscFunctionBegin;
1272   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1273   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1274 
1275   /* diagonal part */
1276   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1277   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1278 
1279   /* subdiagonal part */
1280   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1281 
1282   /* copy x into the vec slvec0 */
1283   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1284   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1285 
1286   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1287   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1288   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1289 
1290   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1291   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1292   /* supperdiagonal part */
1293   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1298 {
1299   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1300   PetscErrorCode ierr;
1301   PetscInt       nt;
1302 
1303   PetscFunctionBegin;
1304   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1305   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1306 
1307   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1308   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1309 
1310   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1311   /* do diagonal part */
1312   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1313   /* do supperdiagonal part */
1314   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1315   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1316   /* do subdiagonal part */
1317   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1318   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1319   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1324 {
1325   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1326   PetscErrorCode    ierr;
1327   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1328   PetscScalar       *from,zero=0.0;
1329   const PetscScalar *x;
1330 
1331   PetscFunctionBegin;
1332   /*
1333   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1334   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1335   */
1336   /* diagonal part */
1337   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1338   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1339 
1340   /* subdiagonal part */
1341   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1342 
1343   /* copy x into the vec slvec0 */
1344   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1345   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1346   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1347   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1348 
1349   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1350   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1351   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1352 
1353   /* supperdiagonal part */
1354   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1359 {
1360   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1361   PetscErrorCode ierr;
1362 
1363   PetscFunctionBegin;
1364   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1365   /* do diagonal part */
1366   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1367   /* do supperdiagonal part */
1368   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1369   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1370 
1371   /* do subdiagonal part */
1372   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1373   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1374   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 /*
1379   This only works correctly for square matrices where the subblock A->A is the
1380    diagonal block
1381 */
1382 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1383 {
1384   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1385   PetscErrorCode ierr;
1386 
1387   PetscFunctionBegin;
1388   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1389   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1394 {
1395   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1396   PetscErrorCode ierr;
1397 
1398   PetscFunctionBegin;
1399   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1400   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1405 {
1406   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1407   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1408   PetscErrorCode ierr;
1409   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1410   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1411   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1412 
1413   PetscFunctionBegin;
1414   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1415   mat->getrowactive = PETSC_TRUE;
1416 
1417   if (!mat->rowvalues && (idx || v)) {
1418     /*
1419         allocate enough space to hold information from the longest row.
1420     */
1421     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1422     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1423     PetscInt     max = 1,mbs = mat->mbs,tmp;
1424     for (i=0; i<mbs; i++) {
1425       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1426       if (max < tmp) max = tmp;
1427     }
1428     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1429   }
1430 
1431   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1432   lrow = row - brstart;  /* local row index */
1433 
1434   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1435   if (!v)   {pvA = 0; pvB = 0;}
1436   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1437   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1438   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1439   nztot = nzA + nzB;
1440 
1441   cmap = mat->garray;
1442   if (v  || idx) {
1443     if (nztot) {
1444       /* Sort by increasing column numbers, assuming A and B already sorted */
1445       PetscInt imark = -1;
1446       if (v) {
1447         *v = v_p = mat->rowvalues;
1448         for (i=0; i<nzB; i++) {
1449           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1450           else break;
1451         }
1452         imark = i;
1453         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1454         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1455       }
1456       if (idx) {
1457         *idx = idx_p = mat->rowindices;
1458         if (imark > -1) {
1459           for (i=0; i<imark; i++) {
1460             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1461           }
1462         } else {
1463           for (i=0; i<nzB; i++) {
1464             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1465             else break;
1466           }
1467           imark = i;
1468         }
1469         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1470         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1471       }
1472     } else {
1473       if (idx) *idx = 0;
1474       if (v)   *v   = 0;
1475     }
1476   }
1477   *nz  = nztot;
1478   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1479   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1484 {
1485   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1486 
1487   PetscFunctionBegin;
1488   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1489   baij->getrowactive = PETSC_FALSE;
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1494 {
1495   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1496   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1497 
1498   PetscFunctionBegin;
1499   aA->getrow_utriangular = PETSC_TRUE;
1500   PetscFunctionReturn(0);
1501 }
1502 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1503 {
1504   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1505   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1506 
1507   PetscFunctionBegin;
1508   aA->getrow_utriangular = PETSC_FALSE;
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1513 {
1514   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1515   PetscErrorCode ierr;
1516 
1517   PetscFunctionBegin;
1518   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1519   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1524 {
1525   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1526   PetscErrorCode ierr;
1527 
1528   PetscFunctionBegin;
1529   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1530   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1535    Input: isrow       - distributed(parallel),
1536           iscol_local - locally owned (seq)
1537 */
1538 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1539 {
1540   PetscErrorCode ierr;
1541   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1542   const PetscInt *ptr1,*ptr2;
1543 
1544   PetscFunctionBegin;
1545   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1546   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1547   if (sz1 > sz2) {
1548     *flg = PETSC_FALSE;
1549     PetscFunctionReturn(0);
1550   }
1551 
1552   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1553   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1554 
1555   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1556   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1557   ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr);
1558   ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr);
1559   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1560   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1561 
1562   nmatch=0;
1563   k     = 0;
1564   for (i=0; i<sz1; i++){
1565     for (j=k; j<sz2; j++){
1566       if (a1[i] == a2[j]) {
1567         k = j; nmatch++;
1568         break;
1569       }
1570     }
1571   }
1572   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1573   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1574   ierr = PetscFree(a1);CHKERRQ(ierr);
1575   ierr = PetscFree(a2);CHKERRQ(ierr);
1576   if (nmatch < sz1) {
1577     *flg = PETSC_FALSE;
1578   } else {
1579     *flg = PETSC_TRUE;
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1585 {
1586   PetscErrorCode ierr;
1587   IS             iscol_local;
1588   PetscInt       csize;
1589   PetscBool      isequal;
1590 
1591   PetscFunctionBegin;
1592   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1593   if (call == MAT_REUSE_MATRIX) {
1594     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1595     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1596   } else {
1597     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1598     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1599     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1600   }
1601 
1602   /* now call MatCreateSubMatrix_MPIBAIJ() */
1603   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1604   if (call == MAT_INITIAL_MATRIX) {
1605     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1606     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1607   }
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1612 {
1613   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1614   PetscErrorCode ierr;
1615 
1616   PetscFunctionBegin;
1617   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1618   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1623 {
1624   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1625   Mat            A  = a->A,B = a->B;
1626   PetscErrorCode ierr;
1627   PetscReal      isend[5],irecv[5];
1628 
1629   PetscFunctionBegin;
1630   info->block_size = (PetscReal)matin->rmap->bs;
1631 
1632   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1633 
1634   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1635   isend[3] = info->memory;  isend[4] = info->mallocs;
1636 
1637   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1638 
1639   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1640   isend[3] += info->memory;  isend[4] += info->mallocs;
1641   if (flag == MAT_LOCAL) {
1642     info->nz_used      = isend[0];
1643     info->nz_allocated = isend[1];
1644     info->nz_unneeded  = isend[2];
1645     info->memory       = isend[3];
1646     info->mallocs      = isend[4];
1647   } else if (flag == MAT_GLOBAL_MAX) {
1648     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1649 
1650     info->nz_used      = irecv[0];
1651     info->nz_allocated = irecv[1];
1652     info->nz_unneeded  = irecv[2];
1653     info->memory       = irecv[3];
1654     info->mallocs      = irecv[4];
1655   } else if (flag == MAT_GLOBAL_SUM) {
1656     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1657 
1658     info->nz_used      = irecv[0];
1659     info->nz_allocated = irecv[1];
1660     info->nz_unneeded  = irecv[2];
1661     info->memory       = irecv[3];
1662     info->mallocs      = irecv[4];
1663   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1664   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1665   info->fill_ratio_needed = 0;
1666   info->factor_mallocs    = 0;
1667   PetscFunctionReturn(0);
1668 }
1669 
1670 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1671 {
1672   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1673   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1674   PetscErrorCode ierr;
1675 
1676   PetscFunctionBegin;
1677   switch (op) {
1678   case MAT_NEW_NONZERO_LOCATIONS:
1679   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1680   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1681   case MAT_KEEP_NONZERO_PATTERN:
1682   case MAT_SUBMAT_SINGLEIS:
1683   case MAT_NEW_NONZERO_LOCATION_ERR:
1684     MatCheckPreallocated(A,1);
1685     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1686     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1687     break;
1688   case MAT_ROW_ORIENTED:
1689     MatCheckPreallocated(A,1);
1690     a->roworiented = flg;
1691 
1692     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1693     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1694     break;
1695   case MAT_NEW_DIAGONALS:
1696   case MAT_SORTED_FULL:
1697     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1698     break;
1699   case MAT_IGNORE_OFF_PROC_ENTRIES:
1700     a->donotstash = flg;
1701     break;
1702   case MAT_USE_HASH_TABLE:
1703     a->ht_flag = flg;
1704     break;
1705   case MAT_HERMITIAN:
1706     MatCheckPreallocated(A,1);
1707     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1708     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1709 #if defined(PETSC_USE_COMPLEX)
1710     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1711 #endif
1712     break;
1713   case MAT_SPD:
1714     A->spd_set = PETSC_TRUE;
1715     A->spd     = flg;
1716     if (flg) {
1717       A->symmetric                  = PETSC_TRUE;
1718       A->structurally_symmetric     = PETSC_TRUE;
1719       A->symmetric_set              = PETSC_TRUE;
1720       A->structurally_symmetric_set = PETSC_TRUE;
1721     }
1722     break;
1723   case MAT_SYMMETRIC:
1724     MatCheckPreallocated(A,1);
1725     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1726     break;
1727   case MAT_STRUCTURALLY_SYMMETRIC:
1728     MatCheckPreallocated(A,1);
1729     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1730     break;
1731   case MAT_SYMMETRY_ETERNAL:
1732     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1733     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1734     break;
1735   case MAT_IGNORE_LOWER_TRIANGULAR:
1736     aA->ignore_ltriangular = flg;
1737     break;
1738   case MAT_ERROR_LOWER_TRIANGULAR:
1739     aA->ignore_ltriangular = flg;
1740     break;
1741   case MAT_GETROW_UPPERTRIANGULAR:
1742     aA->getrow_utriangular = flg;
1743     break;
1744   default:
1745     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1746   }
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1751 {
1752   PetscErrorCode ierr;
1753 
1754   PetscFunctionBegin;
1755   if (reuse == MAT_INITIAL_MATRIX) {
1756     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1757   }  else if (reuse == MAT_REUSE_MATRIX) {
1758     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1759   }
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1764 {
1765   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1766   Mat            a     = baij->A, b=baij->B;
1767   PetscErrorCode ierr;
1768   PetscInt       nv,m,n;
1769   PetscBool      flg;
1770 
1771   PetscFunctionBegin;
1772   if (ll != rr) {
1773     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1774     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1775   }
1776   if (!ll) PetscFunctionReturn(0);
1777 
1778   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1779   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1780 
1781   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1782   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1783 
1784   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1785 
1786   /* left diagonalscale the off-diagonal part */
1787   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1788 
1789   /* scale the diagonal part */
1790   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1791 
1792   /* right diagonalscale the off-diagonal part */
1793   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1794   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1799 {
1800   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1801   PetscErrorCode ierr;
1802 
1803   PetscFunctionBegin;
1804   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1809 
1810 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1811 {
1812   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1813   Mat            a,b,c,d;
1814   PetscBool      flg;
1815   PetscErrorCode ierr;
1816 
1817   PetscFunctionBegin;
1818   a = matA->A; b = matA->B;
1819   c = matB->A; d = matB->B;
1820 
1821   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1822   if (flg) {
1823     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1824   }
1825   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1830 {
1831   PetscErrorCode ierr;
1832   PetscBool      isbaij;
1833 
1834   PetscFunctionBegin;
1835   ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
1836   if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1837   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1838   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1839     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1840     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1841     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1842   } else {
1843     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1844     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1845 
1846     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1847     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1848   }
1849   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1854 {
1855   PetscErrorCode ierr;
1856 
1857   PetscFunctionBegin;
1858   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1863 {
1864   PetscErrorCode ierr;
1865   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1866   PetscBLASInt   bnz,one=1;
1867   Mat_SeqSBAIJ   *xa,*ya;
1868   Mat_SeqBAIJ    *xb,*yb;
1869 
1870   PetscFunctionBegin;
1871   if (str == SAME_NONZERO_PATTERN) {
1872     PetscScalar alpha = a;
1873     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1874     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1875     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1876     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1877     xb   = (Mat_SeqBAIJ*)xx->B->data;
1878     yb   = (Mat_SeqBAIJ*)yy->B->data;
1879     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1880     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1881     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1882   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1883     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1884     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1885     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1886   } else {
1887     Mat      B;
1888     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1889     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1890     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1891     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1892     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1893     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1894     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1895     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1896     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1897     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1898     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1899     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1900     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1901     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1902     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1903     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1904     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1905     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1906     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1907     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1908   }
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1913 {
1914   PetscErrorCode ierr;
1915   PetscInt       i;
1916   PetscBool      flg;
1917 
1918   PetscFunctionBegin;
1919   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1920   for (i=0; i<n; i++) {
1921     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1922     if (!flg) {
1923       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1924     }
1925   }
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1930 {
1931   PetscErrorCode ierr;
1932   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1933   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1934 
1935   PetscFunctionBegin;
1936   if (!Y->preallocated) {
1937     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1938   } else if (!aij->nz) {
1939     PetscInt nonew = aij->nonew;
1940     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1941     aij->nonew = nonew;
1942   }
1943   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1948 {
1949   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1950   PetscErrorCode ierr;
1951 
1952   PetscFunctionBegin;
1953   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1954   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1955   if (d) {
1956     PetscInt rstart;
1957     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1958     *d += rstart/A->rmap->bs;
1959 
1960   }
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1965 {
1966   PetscFunctionBegin;
1967   *a = ((Mat_MPISBAIJ*)A->data)->A;
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 /* -------------------------------------------------------------------*/
1972 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1973                                        MatGetRow_MPISBAIJ,
1974                                        MatRestoreRow_MPISBAIJ,
1975                                        MatMult_MPISBAIJ,
1976                                /*  4*/ MatMultAdd_MPISBAIJ,
1977                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1978                                        MatMultAdd_MPISBAIJ,
1979                                        0,
1980                                        0,
1981                                        0,
1982                                /* 10*/ 0,
1983                                        0,
1984                                        0,
1985                                        MatSOR_MPISBAIJ,
1986                                        MatTranspose_MPISBAIJ,
1987                                /* 15*/ MatGetInfo_MPISBAIJ,
1988                                        MatEqual_MPISBAIJ,
1989                                        MatGetDiagonal_MPISBAIJ,
1990                                        MatDiagonalScale_MPISBAIJ,
1991                                        MatNorm_MPISBAIJ,
1992                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1993                                        MatAssemblyEnd_MPISBAIJ,
1994                                        MatSetOption_MPISBAIJ,
1995                                        MatZeroEntries_MPISBAIJ,
1996                                /* 24*/ 0,
1997                                        0,
1998                                        0,
1999                                        0,
2000                                        0,
2001                                /* 29*/ MatSetUp_MPISBAIJ,
2002                                        0,
2003                                        0,
2004                                        MatGetDiagonalBlock_MPISBAIJ,
2005                                        0,
2006                                /* 34*/ MatDuplicate_MPISBAIJ,
2007                                        0,
2008                                        0,
2009                                        0,
2010                                        0,
2011                                /* 39*/ MatAXPY_MPISBAIJ,
2012                                        MatCreateSubMatrices_MPISBAIJ,
2013                                        MatIncreaseOverlap_MPISBAIJ,
2014                                        MatGetValues_MPISBAIJ,
2015                                        MatCopy_MPISBAIJ,
2016                                /* 44*/ 0,
2017                                        MatScale_MPISBAIJ,
2018                                        MatShift_MPISBAIJ,
2019                                        0,
2020                                        0,
2021                                /* 49*/ 0,
2022                                        0,
2023                                        0,
2024                                        0,
2025                                        0,
2026                                /* 54*/ 0,
2027                                        0,
2028                                        MatSetUnfactored_MPISBAIJ,
2029                                        0,
2030                                        MatSetValuesBlocked_MPISBAIJ,
2031                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
2032                                        0,
2033                                        0,
2034                                        0,
2035                                        0,
2036                                /* 64*/ 0,
2037                                        0,
2038                                        0,
2039                                        0,
2040                                        0,
2041                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
2042                                        0,
2043                                        0,
2044                                        0,
2045                                        0,
2046                                /* 74*/ 0,
2047                                        0,
2048                                        0,
2049                                        0,
2050                                        0,
2051                                /* 79*/ 0,
2052                                        0,
2053                                        0,
2054                                        0,
2055                                        MatLoad_MPISBAIJ,
2056                                /* 84*/ 0,
2057                                        0,
2058                                        0,
2059                                        0,
2060                                        0,
2061                                /* 89*/ 0,
2062                                        0,
2063                                        0,
2064                                        0,
2065                                        0,
2066                                /* 94*/ 0,
2067                                        0,
2068                                        0,
2069                                        0,
2070                                        0,
2071                                /* 99*/ 0,
2072                                        0,
2073                                        0,
2074                                        0,
2075                                        0,
2076                                /*104*/ 0,
2077                                        MatRealPart_MPISBAIJ,
2078                                        MatImaginaryPart_MPISBAIJ,
2079                                        MatGetRowUpperTriangular_MPISBAIJ,
2080                                        MatRestoreRowUpperTriangular_MPISBAIJ,
2081                                /*109*/ 0,
2082                                        0,
2083                                        0,
2084                                        0,
2085                                        MatMissingDiagonal_MPISBAIJ,
2086                                /*114*/ 0,
2087                                        0,
2088                                        0,
2089                                        0,
2090                                        0,
2091                                /*119*/ 0,
2092                                        0,
2093                                        0,
2094                                        0,
2095                                        0,
2096                                /*124*/ 0,
2097                                        0,
2098                                        0,
2099                                        0,
2100                                        0,
2101                                /*129*/ 0,
2102                                        0,
2103                                        0,
2104                                        0,
2105                                        0,
2106                                /*134*/ 0,
2107                                        0,
2108                                        0,
2109                                        0,
2110                                        0,
2111                                /*139*/ MatSetBlockSizes_Default,
2112                                        0,
2113                                        0,
2114                                        0,
2115                                        0,
2116                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2117 };
2118 
2119 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2120 {
2121   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
2122   PetscErrorCode ierr;
2123   PetscInt       i,mbs,Mbs;
2124   PetscMPIInt    size;
2125 
2126   PetscFunctionBegin;
2127   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2128   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2129   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2130   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2131   if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N);
2132   if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n);
2133 
2134   mbs = B->rmap->n/bs;
2135   Mbs = B->rmap->N/bs;
2136   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2137 
2138   B->rmap->bs = bs;
2139   b->bs2      = bs*bs;
2140   b->mbs      = mbs;
2141   b->Mbs      = Mbs;
2142   b->nbs      = B->cmap->n/bs;
2143   b->Nbs      = B->cmap->N/bs;
2144 
2145   for (i=0; i<=b->size; i++) {
2146     b->rangebs[i] = B->rmap->range[i]/bs;
2147   }
2148   b->rstartbs = B->rmap->rstart/bs;
2149   b->rendbs   = B->rmap->rend/bs;
2150 
2151   b->cstartbs = B->cmap->rstart/bs;
2152   b->cendbs   = B->cmap->rend/bs;
2153 
2154 #if defined(PETSC_USE_CTABLE)
2155   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2156 #else
2157   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2158 #endif
2159   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2160   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2161   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2162   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2163   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2164   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2165   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2166   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2167   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2168 
2169   /* Because the B will have been resized we simply destroy it and create a new one each time */
2170   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2171   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2172   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2173   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2174   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2175   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2176 
2177   if (!B->preallocated) {
2178     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2179     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2180     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2181     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2182     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2183   }
2184 
2185   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2186   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2187 
2188   B->preallocated  = PETSC_TRUE;
2189   B->was_assembled = PETSC_FALSE;
2190   B->assembled     = PETSC_FALSE;
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2195 {
2196   PetscInt       m,rstart,cend;
2197   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0;
2198   const PetscInt *JJ    =0;
2199   PetscScalar    *values=0;
2200   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
2201   PetscErrorCode ierr;
2202   PetscBool      nooffprocentries;
2203 
2204   PetscFunctionBegin;
2205   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2206   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2207   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2208   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2209   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2210   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2211   m      = B->rmap->n/bs;
2212   rstart = B->rmap->rstart/bs;
2213   cend   = B->cmap->rend/bs;
2214 
2215   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2216   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2217   for (i=0; i<m; i++) {
2218     nz = ii[i+1] - ii[i];
2219     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2220     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2221     JJ     = jj + ii[i];
2222     bd     = 0;
2223     for (j=0; j<nz; j++) {
2224       if (*JJ >= i + rstart) break;
2225       JJ++;
2226       bd++;
2227     }
2228     d  = 0;
2229     for (; j<nz; j++) {
2230       if (*JJ++ >= cend) break;
2231       d++;
2232     }
2233     d_nnz[i] = d;
2234     o_nnz[i] = nz - d - bd;
2235     nz       = nz - bd;
2236     nz_max = PetscMax(nz_max,nz);
2237   }
2238   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2239   ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2240   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2241 
2242   values = (PetscScalar*)V;
2243   if (!values) {
2244     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2245   }
2246   for (i=0; i<m; i++) {
2247     PetscInt          row    = i + rstart;
2248     PetscInt          ncols  = ii[i+1] - ii[i];
2249     const PetscInt    *icols = jj + ii[i];
2250     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2251       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2252       ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2253     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2254       PetscInt j;
2255       for (j=0; j<ncols; j++) {
2256         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2257         ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2258       }
2259     }
2260   }
2261 
2262   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2263   nooffprocentries    = B->nooffprocentries;
2264   B->nooffprocentries = PETSC_TRUE;
2265   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2266   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2267   B->nooffprocentries = nooffprocentries;
2268 
2269   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 /*MC
2274    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2275    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2276    the matrix is stored.
2277 
2278    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2279    can call MatSetOption(Mat, MAT_HERMITIAN);
2280 
2281    Options Database Keys:
2282 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2283 
2284    Notes:
2285      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2286      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2287 
2288    Level: beginner
2289 
2290 .seealso: MatCreateMPISBAIJ(), MATSEQSBAIJ, MatType
2291 M*/
2292 
2293 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2294 {
2295   Mat_MPISBAIJ   *b;
2296   PetscErrorCode ierr;
2297   PetscBool      flg = PETSC_FALSE;
2298 
2299   PetscFunctionBegin;
2300   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2301   B->data = (void*)b;
2302   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2303 
2304   B->ops->destroy = MatDestroy_MPISBAIJ;
2305   B->ops->view    = MatView_MPISBAIJ;
2306   B->assembled    = PETSC_FALSE;
2307   B->insertmode   = NOT_SET_VALUES;
2308 
2309   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2310   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2311 
2312   /* build local table of row and column ownerships */
2313   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2314 
2315   /* build cache for off array entries formed */
2316   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2317 
2318   b->donotstash  = PETSC_FALSE;
2319   b->colmap      = NULL;
2320   b->garray      = NULL;
2321   b->roworiented = PETSC_TRUE;
2322 
2323   /* stuff used in block assembly */
2324   b->barray = 0;
2325 
2326   /* stuff used for matrix vector multiply */
2327   b->lvec    = 0;
2328   b->Mvctx   = 0;
2329   b->slvec0  = 0;
2330   b->slvec0b = 0;
2331   b->slvec1  = 0;
2332   b->slvec1a = 0;
2333   b->slvec1b = 0;
2334   b->sMvctx  = 0;
2335 
2336   /* stuff for MatGetRow() */
2337   b->rowindices   = 0;
2338   b->rowvalues    = 0;
2339   b->getrowactive = PETSC_FALSE;
2340 
2341   /* hash table stuff */
2342   b->ht           = 0;
2343   b->hd           = 0;
2344   b->ht_size      = 0;
2345   b->ht_flag      = PETSC_FALSE;
2346   b->ht_fact      = 0;
2347   b->ht_total_ct  = 0;
2348   b->ht_insert_ct = 0;
2349 
2350   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2351   b->ijonly = PETSC_FALSE;
2352 
2353   b->in_loc = 0;
2354   b->v_loc  = 0;
2355   b->n_loc  = 0;
2356 
2357   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2358   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2359   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2360   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2361 #if defined(PETSC_HAVE_ELEMENTAL)
2362   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2363 #endif
2364   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr);
2365   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr);
2366 
2367   B->symmetric                  = PETSC_TRUE;
2368   B->structurally_symmetric     = PETSC_TRUE;
2369   B->symmetric_set              = PETSC_TRUE;
2370   B->structurally_symmetric_set = PETSC_TRUE;
2371   B->symmetric_eternal          = PETSC_TRUE;
2372 
2373   B->hermitian                  = PETSC_FALSE;
2374   B->hermitian_set              = PETSC_FALSE;
2375 
2376   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2377   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2378   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2379   if (flg) {
2380     PetscReal fact = 1.39;
2381     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2382     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2383     if (fact <= 1.0) fact = 1.39;
2384     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2385     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2386   }
2387   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 /*MC
2392    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2393 
2394    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2395    and MATMPISBAIJ otherwise.
2396 
2397    Options Database Keys:
2398 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2399 
2400   Level: beginner
2401 
2402 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2403 M*/
2404 
2405 /*@C
2406    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2407    the user should preallocate the matrix storage by setting the parameters
2408    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2409    performance can be increased by more than a factor of 50.
2410 
2411    Collective on Mat
2412 
2413    Input Parameters:
2414 +  B - the matrix
2415 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2416           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2417 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2418            submatrix  (same for all local rows)
2419 .  d_nnz - array containing the number of block nonzeros in the various block rows
2420            in the upper triangular and diagonal part of the in diagonal portion of the local
2421            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2422            for the diagonal entry and set a value even if it is zero.
2423 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2424            submatrix (same for all local rows).
2425 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2426            off-diagonal portion of the local submatrix that is right of the diagonal
2427            (possibly different for each block row) or NULL.
2428 
2429 
2430    Options Database Keys:
2431 +   -mat_no_unroll - uses code that does not unroll the loops in the
2432                      block calculations (much slower)
2433 -   -mat_block_size - size of the blocks to use
2434 
2435    Notes:
2436 
2437    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2438    than it must be used on all processors that share the object for that argument.
2439 
2440    If the *_nnz parameter is given then the *_nz parameter is ignored
2441 
2442    Storage Information:
2443    For a square global matrix we define each processor's diagonal portion
2444    to be its local rows and the corresponding columns (a square submatrix);
2445    each processor's off-diagonal portion encompasses the remainder of the
2446    local matrix (a rectangular submatrix).
2447 
2448    The user can specify preallocated storage for the diagonal part of
2449    the local submatrix with either d_nz or d_nnz (not both).  Set
2450    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2451    memory allocation.  Likewise, specify preallocated storage for the
2452    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2453 
2454    You can call MatGetInfo() to get information on how effective the preallocation was;
2455    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2456    You can also run with the option -info and look for messages with the string
2457    malloc in them to see if additional memory allocation was needed.
2458 
2459    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2460    the figure below we depict these three local rows and all columns (0-11).
2461 
2462 .vb
2463            0 1 2 3 4 5 6 7 8 9 10 11
2464           --------------------------
2465    row 3  |. . . d d d o o o o  o  o
2466    row 4  |. . . d d d o o o o  o  o
2467    row 5  |. . . d d d o o o o  o  o
2468           --------------------------
2469 .ve
2470 
2471    Thus, any entries in the d locations are stored in the d (diagonal)
2472    submatrix, and any entries in the o locations are stored in the
2473    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2474    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2475 
2476    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2477    plus the diagonal part of the d matrix,
2478    and o_nz should indicate the number of block nonzeros per row in the o matrix
2479 
2480    In general, for PDE problems in which most nonzeros are near the diagonal,
2481    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2482    or you will get TERRIBLE performance; see the users' manual chapter on
2483    matrices.
2484 
2485    Level: intermediate
2486 
2487 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2488 @*/
2489 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2490 {
2491   PetscErrorCode ierr;
2492 
2493   PetscFunctionBegin;
2494   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2495   PetscValidType(B,1);
2496   PetscValidLogicalCollectiveInt(B,bs,2);
2497   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 /*@C
2502    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2503    (block compressed row).  For good matrix assembly performance
2504    the user should preallocate the matrix storage by setting the parameters
2505    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2506    performance can be increased by more than a factor of 50.
2507 
2508    Collective
2509 
2510    Input Parameters:
2511 +  comm - MPI communicator
2512 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2513           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2514 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2515            This value should be the same as the local size used in creating the
2516            y vector for the matrix-vector product y = Ax.
2517 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2518            This value should be the same as the local size used in creating the
2519            x vector for the matrix-vector product y = Ax.
2520 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2521 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2522 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2523            submatrix  (same for all local rows)
2524 .  d_nnz - array containing the number of block nonzeros in the various block rows
2525            in the upper triangular portion of the in diagonal portion of the local
2526            (possibly different for each block block row) or NULL.
2527            If you plan to factor the matrix you must leave room for the diagonal entry and
2528            set its value even if it is zero.
2529 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2530            submatrix (same for all local rows).
2531 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2532            off-diagonal portion of the local submatrix (possibly different for
2533            each block row) or NULL.
2534 
2535    Output Parameter:
2536 .  A - the matrix
2537 
2538    Options Database Keys:
2539 +   -mat_no_unroll - uses code that does not unroll the loops in the
2540                      block calculations (much slower)
2541 .   -mat_block_size - size of the blocks to use
2542 -   -mat_mpi - use the parallel matrix data structures even on one processor
2543                (defaults to using SeqBAIJ format on one processor)
2544 
2545    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2546    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2547    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2548 
2549    Notes:
2550    The number of rows and columns must be divisible by blocksize.
2551    This matrix type does not support complex Hermitian operation.
2552 
2553    The user MUST specify either the local or global matrix dimensions
2554    (possibly both).
2555 
2556    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2557    than it must be used on all processors that share the object for that argument.
2558 
2559    If the *_nnz parameter is given then the *_nz parameter is ignored
2560 
2561    Storage Information:
2562    For a square global matrix we define each processor's diagonal portion
2563    to be its local rows and the corresponding columns (a square submatrix);
2564    each processor's off-diagonal portion encompasses the remainder of the
2565    local matrix (a rectangular submatrix).
2566 
2567    The user can specify preallocated storage for the diagonal part of
2568    the local submatrix with either d_nz or d_nnz (not both).  Set
2569    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2570    memory allocation.  Likewise, specify preallocated storage for the
2571    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2572 
2573    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2574    the figure below we depict these three local rows and all columns (0-11).
2575 
2576 .vb
2577            0 1 2 3 4 5 6 7 8 9 10 11
2578           --------------------------
2579    row 3  |. . . d d d o o o o  o  o
2580    row 4  |. . . d d d o o o o  o  o
2581    row 5  |. . . d d d o o o o  o  o
2582           --------------------------
2583 .ve
2584 
2585    Thus, any entries in the d locations are stored in the d (diagonal)
2586    submatrix, and any entries in the o locations are stored in the
2587    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2588    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2589 
2590    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2591    plus the diagonal part of the d matrix,
2592    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2593    In general, for PDE problems in which most nonzeros are near the diagonal,
2594    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2595    or you will get TERRIBLE performance; see the users' manual chapter on
2596    matrices.
2597 
2598    Level: intermediate
2599 
2600 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2601 @*/
2602 
2603 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2604 {
2605   PetscErrorCode ierr;
2606   PetscMPIInt    size;
2607 
2608   PetscFunctionBegin;
2609   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2610   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2611   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2612   if (size > 1) {
2613     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2614     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2615   } else {
2616     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2617     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2618   }
2619   PetscFunctionReturn(0);
2620 }
2621 
2622 
2623 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2624 {
2625   Mat            mat;
2626   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2627   PetscErrorCode ierr;
2628   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2629   PetscScalar    *array;
2630 
2631   PetscFunctionBegin;
2632   *newmat = 0;
2633 
2634   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2635   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2636   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2637   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2638   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2639 
2640   mat->factortype   = matin->factortype;
2641   mat->preallocated = PETSC_TRUE;
2642   mat->assembled    = PETSC_TRUE;
2643   mat->insertmode   = NOT_SET_VALUES;
2644 
2645   a      = (Mat_MPISBAIJ*)mat->data;
2646   a->bs2 = oldmat->bs2;
2647   a->mbs = oldmat->mbs;
2648   a->nbs = oldmat->nbs;
2649   a->Mbs = oldmat->Mbs;
2650   a->Nbs = oldmat->Nbs;
2651 
2652   a->size         = oldmat->size;
2653   a->rank         = oldmat->rank;
2654   a->donotstash   = oldmat->donotstash;
2655   a->roworiented  = oldmat->roworiented;
2656   a->rowindices   = 0;
2657   a->rowvalues    = 0;
2658   a->getrowactive = PETSC_FALSE;
2659   a->barray       = 0;
2660   a->rstartbs     = oldmat->rstartbs;
2661   a->rendbs       = oldmat->rendbs;
2662   a->cstartbs     = oldmat->cstartbs;
2663   a->cendbs       = oldmat->cendbs;
2664 
2665   /* hash table stuff */
2666   a->ht           = 0;
2667   a->hd           = 0;
2668   a->ht_size      = 0;
2669   a->ht_flag      = oldmat->ht_flag;
2670   a->ht_fact      = oldmat->ht_fact;
2671   a->ht_total_ct  = 0;
2672   a->ht_insert_ct = 0;
2673 
2674   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr);
2675   if (oldmat->colmap) {
2676 #if defined(PETSC_USE_CTABLE)
2677     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2678 #else
2679     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2680     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2681     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
2682 #endif
2683   } else a->colmap = 0;
2684 
2685   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2686     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2687     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2688     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
2689   } else a->garray = 0;
2690 
2691   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2692   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2693   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2694   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2695   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2696 
2697   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2698   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2699   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2700   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2701 
2702   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2703   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2704   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2705   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2706   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2707   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2708   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2709   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2710   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2711   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2712   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2713   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2714   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2715 
2716   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2717   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2718   a->sMvctx = oldmat->sMvctx;
2719   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2720 
2721   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2722   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2723   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2724   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2725   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2726   *newmat = mat;
2727   PetscFunctionReturn(0);
2728 }
2729 
2730 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2731 {
2732   PetscErrorCode ierr;
2733   PetscInt       i,nz,j,rstart,rend;
2734   PetscScalar    *vals,*buf;
2735   MPI_Comm       comm;
2736   MPI_Status     status;
2737   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2738   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2739   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2740   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2741   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2742   PetscInt       dcount,kmax,k,nzcount,tmp;
2743   int            fd;
2744   PetscBool      isbinary;
2745 
2746   PetscFunctionBegin;
2747   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2748   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
2749 
2750   /* force binary viewer to load .info file if it has not yet done so */
2751   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2752   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2753   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2754   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2755   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2756   if (bs < 0) bs = 1;
2757 
2758   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2759   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2760   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2761   if (!rank) {
2762     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2763     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2764     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2765   }
2766 
2767   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2768   M    = header[1];
2769   N    = header[2];
2770 
2771   /* If global sizes are set, check if they are consistent with that given in the file */
2772   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2773   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2774 
2775   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2776 
2777   /*
2778      This code adds extra rows to make sure the number of rows is
2779      divisible by the blocksize
2780   */
2781   Mbs        = M/bs;
2782   extra_rows = bs - M + bs*(Mbs);
2783   if (extra_rows == bs) extra_rows = 0;
2784   else                  Mbs++;
2785   if (extra_rows &&!rank) {
2786     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2787   }
2788 
2789   /* determine ownership of all rows */
2790   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2791     mbs = Mbs/size + ((Mbs % size) > rank);
2792     m   = mbs*bs;
2793   } else { /* User Set */
2794     m   = newmat->rmap->n;
2795     mbs = m/bs;
2796   }
2797   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2798   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2799   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2800   rowners[0] = 0;
2801   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2802   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2803   rstart = rowners[rank];
2804   rend   = rowners[rank+1];
2805 
2806   /* distribute row lengths to all processors */
2807   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2808   if (!rank) {
2809     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2810     ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
2811     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2812     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2813     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2814     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2815     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2816   } else {
2817     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2818   }
2819 
2820   if (!rank) {   /* procs[0] */
2821     /* calculate the number of nonzeros on each processor */
2822     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2823     for (i=0; i<size; i++) {
2824       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2825         procsnz[i] += rowlengths[j];
2826       }
2827     }
2828     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2829 
2830     /* determine max buffer needed and allocate it */
2831     maxnz = 0;
2832     for (i=0; i<size; i++) {
2833       maxnz = PetscMax(maxnz,procsnz[i]);
2834     }
2835     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2836 
2837     /* read in my part of the matrix column indices  */
2838     nz     = procsnz[0];
2839     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2840     mycols = ibuf;
2841     if (size == 1) nz -= extra_rows;
2842     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
2843     if (size == 1) {
2844       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2845     }
2846 
2847     /* read in every ones (except the last) and ship off */
2848     for (i=1; i<size-1; i++) {
2849       nz   = procsnz[i];
2850       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
2851       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2852     }
2853     /* read in the stuff for the last proc */
2854     if (size != 1) {
2855       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2856       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
2857       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2858       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2859     }
2860     ierr = PetscFree(cols);CHKERRQ(ierr);
2861   } else {  /* procs[i], i>0 */
2862     /* determine buffer space needed for message */
2863     nz = 0;
2864     for (i=0; i<m; i++) nz += locrowlens[i];
2865     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2866     mycols = ibuf;
2867     /* receive message of column indices*/
2868     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2869     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2870     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2871   }
2872 
2873   /* loop over local rows, determining number of off diagonal entries */
2874   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2875   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2876   rowcount = 0;
2877   nzcount  = 0;
2878   for (i=0; i<mbs; i++) {
2879     dcount  = 0;
2880     odcount = 0;
2881     for (j=0; j<bs; j++) {
2882       kmax = locrowlens[rowcount];
2883       for (k=0; k<kmax; k++) {
2884         tmp = mycols[nzcount++]/bs; /* block col. index */
2885         if (!mask[tmp]) {
2886           mask[tmp] = 1;
2887           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2888           else masked1[dcount++] = tmp; /* entry in diag portion */
2889         }
2890       }
2891       rowcount++;
2892     }
2893 
2894     dlens[i]  = dcount;  /* d_nzz[i] */
2895     odlens[i] = odcount; /* o_nzz[i] */
2896 
2897     /* zero out the mask elements we set */
2898     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2899     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2900   }
2901   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2902   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2903   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2904 
2905   if (!rank) {
2906     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2907     /* read in my part of the matrix numerical values  */
2908     nz     = procsnz[0];
2909     vals   = buf;
2910     mycols = ibuf;
2911     if (size == 1) nz -= extra_rows;
2912     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
2913     if (size == 1) {
2914       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2915     }
2916 
2917     /* insert into matrix */
2918     jj = rstart*bs;
2919     for (i=0; i<m; i++) {
2920       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2921       mycols += locrowlens[i];
2922       vals   += locrowlens[i];
2923       jj++;
2924     }
2925 
2926     /* read in other processors (except the last one) and ship out */
2927     for (i=1; i<size-1; i++) {
2928       nz   = procsnz[i];
2929       vals = buf;
2930       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
2931       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2932     }
2933     /* the last proc */
2934     if (size != 1) {
2935       nz   = procsnz[i] - extra_rows;
2936       vals = buf;
2937       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
2938       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2939       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2940     }
2941     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2942 
2943   } else {
2944     /* receive numeric values */
2945     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2946 
2947     /* receive message of values*/
2948     vals   = buf;
2949     mycols = ibuf;
2950     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2951     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2952     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2953 
2954     /* insert into matrix */
2955     jj = rstart*bs;
2956     for (i=0; i<m; i++) {
2957       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2958       mycols += locrowlens[i];
2959       vals   += locrowlens[i];
2960       jj++;
2961     }
2962   }
2963 
2964   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2965   ierr = PetscFree(buf);CHKERRQ(ierr);
2966   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2967   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2968   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2969   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2970   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2971   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2972   PetscFunctionReturn(0);
2973 }
2974 
2975 /*XXXXX@
2976    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2977 
2978    Input Parameters:
2979 .  mat  - the matrix
2980 .  fact - factor
2981 
2982    Not Collective on Mat, each process can have a different hash factor
2983 
2984    Level: advanced
2985 
2986   Notes:
2987    This can also be set by the command line option: -mat_use_hash_table fact
2988 
2989 .seealso: MatSetOption()
2990 @XXXXX*/
2991 
2992 
2993 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2994 {
2995   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2996   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2997   PetscReal      atmp;
2998   PetscReal      *work,*svalues,*rvalues;
2999   PetscErrorCode ierr;
3000   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
3001   PetscMPIInt    rank,size;
3002   PetscInt       *rowners_bs,dest,count,source;
3003   PetscScalar    *va;
3004   MatScalar      *ba;
3005   MPI_Status     stat;
3006 
3007   PetscFunctionBegin;
3008   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
3009   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
3010   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3011 
3012   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3013   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3014 
3015   bs  = A->rmap->bs;
3016   mbs = a->mbs;
3017   Mbs = a->Mbs;
3018   ba  = b->a;
3019   bi  = b->i;
3020   bj  = b->j;
3021 
3022   /* find ownerships */
3023   rowners_bs = A->rmap->range;
3024 
3025   /* each proc creates an array to be distributed */
3026   ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr);
3027 
3028   /* row_max for B */
3029   if (rank != size-1) {
3030     for (i=0; i<mbs; i++) {
3031       ncols = bi[1] - bi[0]; bi++;
3032       brow  = bs*i;
3033       for (j=0; j<ncols; j++) {
3034         bcol = bs*(*bj);
3035         for (kcol=0; kcol<bs; kcol++) {
3036           col  = bcol + kcol;                /* local col index */
3037           col += rowners_bs[rank+1];      /* global col index */
3038           for (krow=0; krow<bs; krow++) {
3039             atmp = PetscAbsScalar(*ba); ba++;
3040             row  = brow + krow;   /* local row index */
3041             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
3042             if (work[col] < atmp) work[col] = atmp;
3043           }
3044         }
3045         bj++;
3046       }
3047     }
3048 
3049     /* send values to its owners */
3050     for (dest=rank+1; dest<size; dest++) {
3051       svalues = work + rowners_bs[dest];
3052       count   = rowners_bs[dest+1]-rowners_bs[dest];
3053       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
3054     }
3055   }
3056 
3057   /* receive values */
3058   if (rank) {
3059     rvalues = work;
3060     count   = rowners_bs[rank+1]-rowners_bs[rank];
3061     for (source=0; source<rank; source++) {
3062       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
3063       /* process values */
3064       for (i=0; i<count; i++) {
3065         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
3066       }
3067     }
3068   }
3069 
3070   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3071   ierr = PetscFree(work);CHKERRQ(ierr);
3072   PetscFunctionReturn(0);
3073 }
3074 
3075 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3076 {
3077   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
3078   PetscErrorCode    ierr;
3079   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
3080   PetscScalar       *x,*ptr,*from;
3081   Vec               bb1;
3082   const PetscScalar *b;
3083 
3084   PetscFunctionBegin;
3085   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3086   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3087 
3088   if (flag == SOR_APPLY_UPPER) {
3089     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3090     PetscFunctionReturn(0);
3091   }
3092 
3093   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3094     if (flag & SOR_ZERO_INITIAL_GUESS) {
3095       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3096       its--;
3097     }
3098 
3099     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3100     while (its--) {
3101 
3102       /* lower triangular part: slvec0b = - B^T*xx */
3103       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3104 
3105       /* copy xx into slvec0a */
3106       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3107       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3108       ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr);
3109       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3110 
3111       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
3112 
3113       /* copy bb into slvec1a */
3114       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3115       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3116       ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr);
3117       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3118 
3119       /* set slvec1b = 0 */
3120       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3121 
3122       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3123       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3124       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3125       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3126 
3127       /* upper triangular part: bb1 = bb1 - B*x */
3128       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
3129 
3130       /* local diagonal sweep */
3131       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3132     }
3133     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3134   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3135     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3136   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3137     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3138   } else if (flag & SOR_EISENSTAT) {
3139     Vec               xx1;
3140     PetscBool         hasop;
3141     const PetscScalar *diag;
3142     PetscScalar       *sl,scale = (omega - 2.0)/omega;
3143     PetscInt          i,n;
3144 
3145     if (!mat->xx1) {
3146       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
3147       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
3148     }
3149     xx1 = mat->xx1;
3150     bb1 = mat->bb1;
3151 
3152     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
3153 
3154     if (!mat->diag) {
3155       /* this is wrong for same matrix with new nonzero values */
3156       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
3157       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
3158     }
3159     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
3160 
3161     if (hasop) {
3162       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
3163       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3164     } else {
3165       /*
3166           These two lines are replaced by code that may be a bit faster for a good compiler
3167       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
3168       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3169       */
3170       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3171       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3172       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3173       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3174       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
3175       if (omega == 1.0) {
3176         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3177         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
3178       } else {
3179         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3180         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
3181       }
3182       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3183       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3184       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3185       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3186     }
3187 
3188     /* multiply off-diagonal portion of matrix */
3189     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3190     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3191     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
3192     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3193     ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
3194     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
3195     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3196     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3197     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3198     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
3199 
3200     /* local sweep */
3201     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
3202     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
3203   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3204   PetscFunctionReturn(0);
3205 }
3206 
3207 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3208 {
3209   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3210   PetscErrorCode ierr;
3211   Vec            lvec1,bb1;
3212 
3213   PetscFunctionBegin;
3214   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3215   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3216 
3217   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3218     if (flag & SOR_ZERO_INITIAL_GUESS) {
3219       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3220       its--;
3221     }
3222 
3223     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3224     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3225     while (its--) {
3226       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3227 
3228       /* lower diagonal part: bb1 = bb - B^T*xx */
3229       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3230       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3231 
3232       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3233       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3234       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3235 
3236       /* upper diagonal part: bb1 = bb1 - B*x */
3237       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3238       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3239 
3240       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3241 
3242       /* diagonal sweep */
3243       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3244     }
3245     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3246     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3247   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3248   PetscFunctionReturn(0);
3249 }
3250 
3251 /*@
3252      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3253          CSR format the local rows.
3254 
3255    Collective
3256 
3257    Input Parameters:
3258 +  comm - MPI communicator
3259 .  bs - the block size, only a block size of 1 is supported
3260 .  m - number of local rows (Cannot be PETSC_DECIDE)
3261 .  n - This value should be the same as the local size used in creating the
3262        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3263        calculated if N is given) For square matrices n is almost always m.
3264 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3265 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3266 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
3267 .   j - column indices
3268 -   a - matrix values
3269 
3270    Output Parameter:
3271 .   mat - the matrix
3272 
3273    Level: intermediate
3274 
3275    Notes:
3276        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3277      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3278      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3279 
3280        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3281 
3282 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3283           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3284 @*/
3285 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3286 {
3287   PetscErrorCode ierr;
3288 
3289 
3290   PetscFunctionBegin;
3291   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3292   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3293   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3294   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3295   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3296   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3297   PetscFunctionReturn(0);
3298 }
3299 
3300 
3301 /*@C
3302    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
3303 
3304    Collective
3305 
3306    Input Parameters:
3307 +  B - the matrix
3308 .  bs - the block size
3309 .  i - the indices into j for the start of each local row (starts with zero)
3310 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3311 -  v - optional values in the matrix
3312 
3313    Level: advanced
3314 
3315    Notes:
3316    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
3317    and usually the numerical values as well
3318 
3319    Any entries below the diagonal are ignored
3320 
3321 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3322 @*/
3323 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3324 {
3325   PetscErrorCode ierr;
3326 
3327   PetscFunctionBegin;
3328   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3329   PetscFunctionReturn(0);
3330 }
3331 
3332 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3333 {
3334   PetscErrorCode ierr;
3335   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3336   PetscInt       *indx;
3337   PetscScalar    *values;
3338 
3339   PetscFunctionBegin;
3340   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3341   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3342     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3343     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3344     PetscInt       *bindx,rmax=a->rmax,j;
3345     PetscMPIInt    rank,size;
3346 
3347     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3348     mbs = m/bs; Nbs = N/cbs;
3349     if (n == PETSC_DECIDE) {
3350       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3351     }
3352     nbs = n/cbs;
3353 
3354     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3355     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3356 
3357     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3358     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3359     if (rank == size-1) {
3360       /* Check sum(nbs) = Nbs */
3361       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3362     }
3363 
3364     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3365     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3366     for (i=0; i<mbs; i++) {
3367       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3368       nnz  = nnz/bs;
3369       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3370       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3371       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3372     }
3373     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3374     ierr = PetscFree(bindx);CHKERRQ(ierr);
3375 
3376     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3377     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3378     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3379     ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr);
3380     ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3381     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3382     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3383   }
3384 
3385   /* numeric phase */
3386   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3387   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3388 
3389   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3390   for (i=0; i<m; i++) {
3391     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3392     Ii   = i + rstart;
3393     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3394     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3395   }
3396   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3397   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3398   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3399   PetscFunctionReturn(0);
3400 }
3401