xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision d9fd370bfa47df0ebe64b20b27d5bd5e1f21abb9)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #if defined(PETSC_HAVE_SCALAPACK)
11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
12 #endif
13 
14 /* This could be moved to matimpl.h */
15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16 {
17   Mat            preallocator;
18   PetscInt       r,rstart,rend;
19   PetscInt       bs,i,m,n,M,N;
20   PetscBool      cong = PETSC_TRUE;
21   PetscErrorCode ierr;
22 
23   PetscFunctionBegin;
24   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
25   PetscValidLogicalCollectiveInt(B,nm,2);
26   for (i = 0; i < nm; i++) {
27     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
28     ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr);
29     if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
30   }
31   PetscValidLogicalCollectiveBool(B,fill,5);
32   ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr);
33   ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr);
34   ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
35   ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr);
36   ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
37   ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr);
38   ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
39   ierr = MatSetUp(preallocator);CHKERRQ(ierr);
40   ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
41   for (r = rstart; r < rend; ++r) {
42     PetscInt          ncols;
43     const PetscInt    *row;
44     const PetscScalar *vals;
45 
46     for (i = 0; i < nm; i++) {
47       ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
48       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
49       if (symm && symm[i]) {
50         ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
51       }
52       ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
53     }
54   }
55   ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
56   ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
57   ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr);
58   ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
59   PetscFunctionReturn(0);
60 }
61 
62 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
63 {
64   Mat            B;
65   PetscErrorCode ierr;
66   PetscInt       r;
67 
68   PetscFunctionBegin;
69   if (reuse != MAT_REUSE_MATRIX) {
70     PetscBool symm = PETSC_TRUE,isdense;
71     PetscInt  bs;
72 
73     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
74     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
75     ierr = MatSetType(B,newtype);CHKERRQ(ierr);
76     ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
77     ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr);
78     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
79     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
80     ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr);
81     if (!isdense) {
82       ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
83       ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr);
84       ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
85     } else {
86       ierr = MatSetUp(B);CHKERRQ(ierr);
87     }
88   } else {
89     B    = *newmat;
90     ierr = MatZeroEntries(B);CHKERRQ(ierr);
91   }
92 
93   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
94   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
95     PetscInt          ncols;
96     const PetscInt    *row;
97     const PetscScalar *vals;
98 
99     ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
100     ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
101 #if defined(PETSC_USE_COMPLEX)
102     if (A->hermitian) {
103       PetscInt i;
104       for (i = 0; i < ncols; i++) {
105         ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr);
106       }
107     } else {
108       ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
109     }
110 #else
111     ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
112 #endif
113     ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
114   }
115   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
116   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
117   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
118 
119   if (reuse == MAT_INPLACE_MATRIX) {
120     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
121   } else {
122     *newmat = B;
123   }
124   PetscFunctionReturn(0);
125 }
126 
127 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
128 {
129   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
130   PetscErrorCode ierr;
131 
132   PetscFunctionBegin;
133   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
134   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
139 {
140   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
141   PetscErrorCode ierr;
142 
143   PetscFunctionBegin;
144   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
145   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
150   { \
151     brow = row/bs;  \
152     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
153     rmax = aimax[brow]; nrow = ailen[brow]; \
154     bcol = col/bs; \
155     ridx = row % bs; cidx = col % bs; \
156     low  = 0; high = nrow; \
157     while (high-low > 3) { \
158       t = (low+high)/2; \
159       if (rp[t] > bcol) high = t; \
160       else              low  = t; \
161     } \
162     for (_i=low; _i<high; _i++) { \
163       if (rp[_i] > bcol) break; \
164       if (rp[_i] == bcol) { \
165         bap = ap + bs2*_i + bs*cidx + ridx; \
166         if (addv == ADD_VALUES) *bap += value;  \
167         else                    *bap  = value;  \
168         goto a_noinsert; \
169       } \
170     } \
171     if (a->nonew == 1) goto a_noinsert; \
172     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
173     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
174     N = nrow++ - 1;  \
175     /* shift up all the later entries in this row */ \
176     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
177     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
178     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
179     rp[_i]                      = bcol;  \
180     ap[bs2*_i + bs*cidx + ridx] = value;  \
181     A->nonzerostate++;\
182 a_noinsert:; \
183     ailen[brow] = nrow; \
184   }
185 
186 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
187   { \
188     brow = row/bs;  \
189     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
190     rmax = bimax[brow]; nrow = bilen[brow]; \
191     bcol = col/bs; \
192     ridx = row % bs; cidx = col % bs; \
193     low  = 0; high = nrow; \
194     while (high-low > 3) { \
195       t = (low+high)/2; \
196       if (rp[t] > bcol) high = t; \
197       else              low  = t; \
198     } \
199     for (_i=low; _i<high; _i++) { \
200       if (rp[_i] > bcol) break; \
201       if (rp[_i] == bcol) { \
202         bap = ap + bs2*_i + bs*cidx + ridx; \
203         if (addv == ADD_VALUES) *bap += value;  \
204         else                    *bap  = value;  \
205         goto b_noinsert; \
206       } \
207     } \
208     if (b->nonew == 1) goto b_noinsert; \
209     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
210     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
211     N = nrow++ - 1;  \
212     /* shift up all the later entries in this row */ \
213     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
214     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
215     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \
216     rp[_i]                      = bcol;  \
217     ap[bs2*_i + bs*cidx + ridx] = value;  \
218     B->nonzerostate++;\
219 b_noinsert:; \
220     bilen[brow] = nrow; \
221   }
222 
223 /* Only add/insert a(i,j) with i<=j (blocks).
224    Any a(i,j) with i>j input by user is ingored or generates an error
225 */
226 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
227 {
228   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
229   MatScalar      value;
230   PetscBool      roworiented = baij->roworiented;
231   PetscErrorCode ierr;
232   PetscInt       i,j,row,col;
233   PetscInt       rstart_orig=mat->rmap->rstart;
234   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
235   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
236 
237   /* Some Variables required in the macro */
238   Mat          A     = baij->A;
239   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
240   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
241   MatScalar    *aa   =a->a;
242 
243   Mat         B     = baij->B;
244   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
245   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
246   MatScalar   *ba   =b->a;
247 
248   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
249   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
250   MatScalar *ap,*bap;
251 
252   /* for stash */
253   PetscInt  n_loc, *in_loc = NULL;
254   MatScalar *v_loc = NULL;
255 
256   PetscFunctionBegin;
257   if (!baij->donotstash) {
258     if (n > baij->n_loc) {
259       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
260       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
261       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
262       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
263 
264       baij->n_loc = n;
265     }
266     in_loc = baij->in_loc;
267     v_loc  = baij->v_loc;
268   }
269 
270   for (i=0; i<m; i++) {
271     if (im[i] < 0) continue;
272     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
273     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
274       row = im[i] - rstart_orig;              /* local row index */
275       for (j=0; j<n; j++) {
276         if (im[i]/bs > in[j]/bs) {
277           if (a->ignore_ltriangular) {
278             continue;    /* ignore lower triangular blocks */
279           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
280         }
281         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
282           col  = in[j] - cstart_orig;         /* local col index */
283           brow = row/bs; bcol = col/bs;
284           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
285           if (roworiented) value = v[i*n+j];
286           else             value = v[i+j*m];
287           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
288           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
289         } else if (in[j] < 0) continue;
290         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
291         else {  /* off-diag entry (B) */
292           if (mat->was_assembled) {
293             if (!baij->colmap) {
294               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
295             }
296 #if defined(PETSC_USE_CTABLE)
297             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
298             col  = col - 1;
299 #else
300             col = baij->colmap[in[j]/bs] - 1;
301 #endif
302             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
303               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
304               col  =  in[j];
305               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
306               B    = baij->B;
307               b    = (Mat_SeqBAIJ*)(B)->data;
308               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
309               ba   = b->a;
310             } else col += in[j]%bs;
311           } else col = in[j];
312           if (roworiented) value = v[i*n+j];
313           else             value = v[i+j*m];
314           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
315           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
316         }
317       }
318     } else {  /* off processor entry */
319       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
320       if (!baij->donotstash) {
321         mat->assembled = PETSC_FALSE;
322         n_loc          = 0;
323         for (j=0; j<n; j++) {
324           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
325           in_loc[n_loc] = in[j];
326           if (roworiented) {
327             v_loc[n_loc] = v[i*n+j];
328           } else {
329             v_loc[n_loc] = v[j*m+i];
330           }
331           n_loc++;
332         }
333         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
334       }
335     }
336   }
337   PetscFunctionReturn(0);
338 }
339 
340 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
341 {
342   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
343   PetscErrorCode    ierr;
344   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
345   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
346   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
347   PetscBool         roworiented=a->roworiented;
348   const PetscScalar *value     = v;
349   MatScalar         *ap,*aa = a->a,*bap;
350 
351   PetscFunctionBegin;
352   if (col < row) {
353     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
354     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
355   }
356   rp   = aj + ai[row];
357   ap   = aa + bs2*ai[row];
358   rmax = imax[row];
359   nrow = ailen[row];
360   value = v;
361   low   = 0;
362   high  = nrow;
363 
364   while (high-low > 7) {
365     t = (low+high)/2;
366     if (rp[t] > col) high = t;
367     else             low  = t;
368   }
369   for (i=low; i<high; i++) {
370     if (rp[i] > col) break;
371     if (rp[i] == col) {
372       bap = ap +  bs2*i;
373       if (roworiented) {
374         if (is == ADD_VALUES) {
375           for (ii=0; ii<bs; ii++) {
376             for (jj=ii; jj<bs2; jj+=bs) {
377               bap[jj] += *value++;
378             }
379           }
380         } else {
381           for (ii=0; ii<bs; ii++) {
382             for (jj=ii; jj<bs2; jj+=bs) {
383               bap[jj] = *value++;
384             }
385           }
386         }
387       } else {
388         if (is == ADD_VALUES) {
389           for (ii=0; ii<bs; ii++) {
390             for (jj=0; jj<bs; jj++) {
391               *bap++ += *value++;
392             }
393           }
394         } else {
395           for (ii=0; ii<bs; ii++) {
396             for (jj=0; jj<bs; jj++) {
397               *bap++  = *value++;
398             }
399           }
400         }
401       }
402       goto noinsert2;
403     }
404   }
405   if (nonew == 1) goto noinsert2;
406   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
407   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
408   N = nrow++ - 1; high++;
409   /* shift up all the later entries in this row */
410   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
411   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
412   rp[i] = col;
413   bap   = ap +  bs2*i;
414   if (roworiented) {
415     for (ii=0; ii<bs; ii++) {
416       for (jj=ii; jj<bs2; jj+=bs) {
417         bap[jj] = *value++;
418       }
419     }
420   } else {
421     for (ii=0; ii<bs; ii++) {
422       for (jj=0; jj<bs; jj++) {
423         *bap++ = *value++;
424       }
425     }
426   }
427   noinsert2:;
428   ailen[row] = nrow;
429   PetscFunctionReturn(0);
430 }
431 
432 /*
433    This routine is exactly duplicated in mpibaij.c
434 */
435 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
436 {
437   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
438   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
439   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
440   PetscErrorCode    ierr;
441   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
442   PetscBool         roworiented=a->roworiented;
443   const PetscScalar *value     = v;
444   MatScalar         *ap,*aa = a->a,*bap;
445 
446   PetscFunctionBegin;
447   rp   = aj + ai[row];
448   ap   = aa + bs2*ai[row];
449   rmax = imax[row];
450   nrow = ailen[row];
451   low  = 0;
452   high = nrow;
453   value = v;
454   while (high-low > 7) {
455     t = (low+high)/2;
456     if (rp[t] > col) high = t;
457     else             low  = t;
458   }
459   for (i=low; i<high; i++) {
460     if (rp[i] > col) break;
461     if (rp[i] == col) {
462       bap = ap +  bs2*i;
463       if (roworiented) {
464         if (is == ADD_VALUES) {
465           for (ii=0; ii<bs; ii++) {
466             for (jj=ii; jj<bs2; jj+=bs) {
467               bap[jj] += *value++;
468             }
469           }
470         } else {
471           for (ii=0; ii<bs; ii++) {
472             for (jj=ii; jj<bs2; jj+=bs) {
473               bap[jj] = *value++;
474             }
475           }
476         }
477       } else {
478         if (is == ADD_VALUES) {
479           for (ii=0; ii<bs; ii++,value+=bs) {
480             for (jj=0; jj<bs; jj++) {
481               bap[jj] += value[jj];
482             }
483             bap += bs;
484           }
485         } else {
486           for (ii=0; ii<bs; ii++,value+=bs) {
487             for (jj=0; jj<bs; jj++) {
488               bap[jj]  = value[jj];
489             }
490             bap += bs;
491           }
492         }
493       }
494       goto noinsert2;
495     }
496   }
497   if (nonew == 1) goto noinsert2;
498   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
499   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
500   N = nrow++ - 1; high++;
501   /* shift up all the later entries in this row */
502   ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
503   ierr  = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
504   rp[i] = col;
505   bap   = ap +  bs2*i;
506   if (roworiented) {
507     for (ii=0; ii<bs; ii++) {
508       for (jj=ii; jj<bs2; jj+=bs) {
509         bap[jj] = *value++;
510       }
511     }
512   } else {
513     for (ii=0; ii<bs; ii++) {
514       for (jj=0; jj<bs; jj++) {
515         *bap++ = *value++;
516       }
517     }
518   }
519   noinsert2:;
520   ailen[row] = nrow;
521   PetscFunctionReturn(0);
522 }
523 
524 /*
525     This routine could be optimized by removing the need for the block copy below and passing stride information
526   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
527 */
528 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
529 {
530   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
531   const MatScalar *value;
532   MatScalar       *barray     =baij->barray;
533   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
534   PetscErrorCode  ierr;
535   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
536   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
537   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
538 
539   PetscFunctionBegin;
540   if (!barray) {
541     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
542     baij->barray = barray;
543   }
544 
545   if (roworiented) {
546     stepval = (n-1)*bs;
547   } else {
548     stepval = (m-1)*bs;
549   }
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
553     if (im[i] >= rstart && im[i] < rend) {
554       row = im[i] - rstart;
555       for (j=0; j<n; j++) {
556         if (im[i] > in[j]) {
557           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
558           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
559         }
560         /* If NumCol = 1 then a copy is not required */
561         if ((roworiented) && (n == 1)) {
562           barray = (MatScalar*) v + i*bs2;
563         } else if ((!roworiented) && (m == 1)) {
564           barray = (MatScalar*) v + j*bs2;
565         } else { /* Here a copy is required */
566           if (roworiented) {
567             value = v + i*(stepval+bs)*bs + j*bs;
568           } else {
569             value = v + j*(stepval+bs)*bs + i*bs;
570           }
571           for (ii=0; ii<bs; ii++,value+=stepval) {
572             for (jj=0; jj<bs; jj++) {
573               *barray++ = *value++;
574             }
575           }
576           barray -=bs2;
577         }
578 
579         if (in[j] >= cstart && in[j] < cend) {
580           col  = in[j] - cstart;
581           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
582         } else if (in[j] < 0) continue;
583         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
584         else {
585           if (mat->was_assembled) {
586             if (!baij->colmap) {
587               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 
590 #if defined(PETSC_USE_DEBUG)
591 #if defined(PETSC_USE_CTABLE)
592             { PetscInt data;
593               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
594               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
595             }
596 #else
597             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
598 #endif
599 #endif
600 #if defined(PETSC_USE_CTABLE)
601             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
602             col  = (col - 1)/bs;
603 #else
604             col = (baij->colmap[in[j]] - 1)/bs;
605 #endif
606             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
607               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
608               col  = in[j];
609             }
610           } else col = in[j];
611           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
612         }
613       }
614     } else {
615       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
616       if (!baij->donotstash) {
617         if (roworiented) {
618           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
633   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
640       row = idxm[i] - bsrstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= bscstart && idxn[j] < bscend) {
645           col  = idxn[j] - bscstart;
646           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!baij->colmap) {
649             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
653           data--;
654 #else
655           data = baij->colmap[idxn[j]/bs]-1;
656 #endif
657           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
658           else {
659             col  = data + idxn[j]%bs;
660             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
670 {
671   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
672   PetscErrorCode ierr;
673   PetscReal      sum[2],*lnorm2;
674 
675   PetscFunctionBegin;
676   if (baij->size == 1) {
677     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
678   } else {
679     if (type == NORM_FROBENIUS) {
680       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
681       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
682       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
683       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
684       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
685       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
686       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
687       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
688     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
689       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
690       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
691       PetscReal    *rsum,*rsum2,vabs;
692       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
693       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
694       MatScalar    *v;
695 
696       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
697       ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr);
698       /* Amat */
699       v = amat->a; jj = amat->j;
700       for (brow=0; brow<mbs; brow++) {
701         grow = bs*(rstart + brow);
702         nz   = amat->i[brow+1] - amat->i[brow];
703         for (bcol=0; bcol<nz; bcol++) {
704           gcol = bs*(rstart + *jj); jj++;
705           for (col=0; col<bs; col++) {
706             for (row=0; row<bs; row++) {
707               vabs            = PetscAbsScalar(*v); v++;
708               rsum[gcol+col] += vabs;
709               /* non-diagonal block */
710               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
711             }
712           }
713         }
714         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
715       }
716       /* Bmat */
717       v = bmat->a; jj = bmat->j;
718       for (brow=0; brow<mbs; brow++) {
719         grow = bs*(rstart + brow);
720         nz = bmat->i[brow+1] - bmat->i[brow];
721         for (bcol=0; bcol<nz; bcol++) {
722           gcol = bs*garray[*jj]; jj++;
723           for (col=0; col<bs; col++) {
724             for (row=0; row<bs; row++) {
725               vabs            = PetscAbsScalar(*v); v++;
726               rsum[gcol+col] += vabs;
727               rsum[grow+row] += vabs;
728             }
729           }
730         }
731         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
732       }
733       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
734       *norm = 0.0;
735       for (col=0; col<mat->cmap->N; col++) {
736         if (rsum2[col] > *norm) *norm = rsum2[col];
737       }
738       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
739     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
745 {
746   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
747   PetscErrorCode ierr;
748   PetscInt       nstash,reallocs;
749 
750   PetscFunctionBegin;
751   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
752 
753   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
754   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
755   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
756   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
757   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
758   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
759   PetscFunctionReturn(0);
760 }
761 
762 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
763 {
764   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
765   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
766   PetscErrorCode ierr;
767   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
768   PetscInt       *row,*col;
769   PetscBool      other_disassembled;
770   PetscMPIInt    n;
771   PetscBool      r1,r2,r3;
772   MatScalar      *val;
773 
774   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
775   PetscFunctionBegin;
776   if (!baij->donotstash &&  !mat->nooffprocentries) {
777     while (1) {
778       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
779       if (!flg) break;
780 
781       for (i=0; i<n;) {
782         /* Now identify the consecutive vals belonging to the same row */
783         for (j=i,rstart=row[j]; j<n; j++) {
784           if (row[j] != rstart) break;
785         }
786         if (j < n) ncols = j-i;
787         else       ncols = n-i;
788         /* Now assemble all these values with a single function call */
789         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
790         i    = j;
791       }
792     }
793     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
794     /* Now process the block-stash. Since the values are stashed column-oriented,
795        set the roworiented flag to column oriented, and after MatSetValues()
796        restore the original flags */
797     r1 = baij->roworiented;
798     r2 = a->roworiented;
799     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
800 
801     baij->roworiented = PETSC_FALSE;
802     a->roworiented    = PETSC_FALSE;
803 
804     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
805     while (1) {
806       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
807       if (!flg) break;
808 
809       for (i=0; i<n;) {
810         /* Now identify the consecutive vals belonging to the same row */
811         for (j=i,rstart=row[j]; j<n; j++) {
812           if (row[j] != rstart) break;
813         }
814         if (j < n) ncols = j-i;
815         else       ncols = n-i;
816         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
817         i    = j;
818       }
819     }
820     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
821 
822     baij->roworiented = r1;
823     a->roworiented    = r2;
824 
825     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
826   }
827 
828   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
829   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
830 
831   /* determine if any processor has disassembled, if so we must
832      also disassemble ourselfs, in order that we may reassemble. */
833   /*
834      if nonzero structure of submatrix B cannot change then we know that
835      no processor disassembled thus we can skip this stuff
836   */
837   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
838     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
839     if (mat->was_assembled && !other_disassembled) {
840       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
841     }
842   }
843 
844   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
845     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
846   }
847   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
848   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
849 
850   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
851 
852   baij->rowvalues = NULL;
853 
854   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
855   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
856     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
857     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
858   }
859   PetscFunctionReturn(0);
860 }
861 
862 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
863 #include <petscdraw.h>
864 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
865 {
866   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
867   PetscErrorCode    ierr;
868   PetscInt          bs   = mat->rmap->bs;
869   PetscMPIInt       rank = baij->rank;
870   PetscBool         iascii,isdraw;
871   PetscViewer       sviewer;
872   PetscViewerFormat format;
873 
874   PetscFunctionBegin;
875   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
876   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
877   if (iascii) {
878     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
879     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
880       MatInfo info;
881       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
882       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
883       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
884       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
885       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
886       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
887       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
889       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
890       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
891       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
892       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
893       PetscFunctionReturn(0);
894     } else if (format == PETSC_VIEWER_ASCII_INFO) {
895       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
896       PetscFunctionReturn(0);
897     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
898       PetscFunctionReturn(0);
899     }
900   }
901 
902   if (isdraw) {
903     PetscDraw draw;
904     PetscBool isnull;
905     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
906     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
907     if (isnull) PetscFunctionReturn(0);
908   }
909 
910   {
911     /* assemble the entire matrix onto first processor. */
912     Mat          A;
913     Mat_SeqSBAIJ *Aloc;
914     Mat_SeqBAIJ  *Bloc;
915     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
916     MatScalar    *a;
917     const char   *matname;
918 
919     /* Should this be the same type as mat? */
920     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
921     if (!rank) {
922       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
923     } else {
924       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
925     }
926     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
927     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
928     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
929     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
930 
931     /* copy over the A part */
932     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
933     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
934     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
935 
936     for (i=0; i<mbs; i++) {
937       rvals[0] = bs*(baij->rstartbs + i);
938       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
939       for (j=ai[i]; j<ai[i+1]; j++) {
940         col = (baij->cstartbs+aj[j])*bs;
941         for (k=0; k<bs; k++) {
942           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
943           col++;
944           a += bs;
945         }
946       }
947     }
948     /* copy over the B part */
949     Bloc = (Mat_SeqBAIJ*)baij->B->data;
950     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
951     for (i=0; i<mbs; i++) {
952 
953       rvals[0] = bs*(baij->rstartbs + i);
954       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
955       for (j=ai[i]; j<ai[i+1]; j++) {
956         col = baij->garray[aj[j]]*bs;
957         for (k=0; k<bs; k++) {
958           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
959           col++;
960           a += bs;
961         }
962       }
963     }
964     ierr = PetscFree(rvals);CHKERRQ(ierr);
965     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
966     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
967     /*
968        Everyone has to call to draw the matrix since the graphics waits are
969        synchronized across all processors that share the PetscDraw object
970     */
971     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
972     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
973     if (!rank) {
974       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
975       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
976     }
977     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
978     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
979     ierr = MatDestroy(&A);CHKERRQ(ierr);
980   }
981   PetscFunctionReturn(0);
982 }
983 
984 /* Used for both MPIBAIJ and MPISBAIJ matrices */
985 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
986 
987 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
988 {
989   PetscErrorCode ierr;
990   PetscBool      iascii,isdraw,issocket,isbinary;
991 
992   PetscFunctionBegin;
993   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
994   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
995   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
996   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
997   if (iascii || isdraw || issocket) {
998     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
999   } else if (isbinary) {
1000     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1001   }
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1006 {
1007   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1008   PetscErrorCode ierr;
1009 
1010   PetscFunctionBegin;
1011 #if defined(PETSC_USE_LOG)
1012   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1013 #endif
1014   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1015   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1016   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1017   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1018 #if defined(PETSC_USE_CTABLE)
1019   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1020 #else
1021   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1022 #endif
1023   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1024   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1025   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1026   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1027   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1028   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1029   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1030   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1031   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1032   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1033   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1034   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1035   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1036   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1037   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1038 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1039   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1040 #endif
1041   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1042   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1043   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1044   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1045 
1046   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1047   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1048   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1049   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1050   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1051 #if defined(PETSC_HAVE_ELEMENTAL)
1052   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1053 #endif
1054 #if defined(PETSC_HAVE_SCALAPACK)
1055   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL);CHKERRQ(ierr);
1056 #endif
1057   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr);
1058   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr);
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1063 {
1064   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1065   PetscErrorCode    ierr;
1066   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1067   PetscScalar       *from;
1068   const PetscScalar *x;
1069 
1070   PetscFunctionBegin;
1071   /* diagonal part */
1072   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1073   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1074 
1075   /* subdiagonal part */
1076   if (!a->B->ops->multhermitiantranspose) SETERRQ1(PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s\n",((PetscObject)a->B)->type_name);
1077   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1078 
1079   /* copy x into the vec slvec0 */
1080   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1081   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1082 
1083   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1084   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1085   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1086 
1087   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1088   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1089   /* supperdiagonal part */
1090   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1095 {
1096   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1097   PetscErrorCode    ierr;
1098   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1099   PetscScalar       *from;
1100   const PetscScalar *x;
1101 
1102   PetscFunctionBegin;
1103   /* diagonal part */
1104   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1105   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1106 
1107   /* subdiagonal part */
1108   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1109 
1110   /* copy x into the vec slvec0 */
1111   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1112   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1113 
1114   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1115   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1116   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1117 
1118   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1120   /* supperdiagonal part */
1121   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1126 {
1127   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1128   PetscErrorCode    ierr;
1129   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1130   PetscScalar       *from,zero=0.0;
1131   const PetscScalar *x;
1132 
1133   PetscFunctionBegin;
1134   /* diagonal part */
1135   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1136   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1137 
1138   /* subdiagonal part */
1139   if (!a->B->ops->multhermitiantranspose) SETERRQ1(PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s\n",((PetscObject)a->B)->type_name);
1140   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1141 
1142   /* copy x into the vec slvec0 */
1143   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1144   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1145   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1146   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1147 
1148   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1149   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1150   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1151 
1152   /* supperdiagonal part */
1153   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1158 {
1159   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1160   PetscErrorCode    ierr;
1161   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1162   PetscScalar       *from,zero=0.0;
1163   const PetscScalar *x;
1164 
1165   PetscFunctionBegin;
1166   /* diagonal part */
1167   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1168   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1169 
1170   /* subdiagonal part */
1171   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1172 
1173   /* copy x into the vec slvec0 */
1174   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1175   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1176   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1177   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1178 
1179   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1180   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1181   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1182 
1183   /* supperdiagonal part */
1184   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 /*
1189   This only works correctly for square matrices where the subblock A->A is the
1190    diagonal block
1191 */
1192 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1193 {
1194   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1195   PetscErrorCode ierr;
1196 
1197   PetscFunctionBegin;
1198   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1199   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1204 {
1205   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1206   PetscErrorCode ierr;
1207 
1208   PetscFunctionBegin;
1209   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1210   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1215 {
1216   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1217   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1218   PetscErrorCode ierr;
1219   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1220   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1221   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1222 
1223   PetscFunctionBegin;
1224   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1225   mat->getrowactive = PETSC_TRUE;
1226 
1227   if (!mat->rowvalues && (idx || v)) {
1228     /*
1229         allocate enough space to hold information from the longest row.
1230     */
1231     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1232     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1233     PetscInt     max = 1,mbs = mat->mbs,tmp;
1234     for (i=0; i<mbs; i++) {
1235       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1236       if (max < tmp) max = tmp;
1237     }
1238     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1239   }
1240 
1241   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1242   lrow = row - brstart;  /* local row index */
1243 
1244   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1245   if (!v)   {pvA = NULL; pvB = NULL;}
1246   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1247   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1248   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1249   nztot = nzA + nzB;
1250 
1251   cmap = mat->garray;
1252   if (v  || idx) {
1253     if (nztot) {
1254       /* Sort by increasing column numbers, assuming A and B already sorted */
1255       PetscInt imark = -1;
1256       if (v) {
1257         *v = v_p = mat->rowvalues;
1258         for (i=0; i<nzB; i++) {
1259           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1260           else break;
1261         }
1262         imark = i;
1263         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1264         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1265       }
1266       if (idx) {
1267         *idx = idx_p = mat->rowindices;
1268         if (imark > -1) {
1269           for (i=0; i<imark; i++) {
1270             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1271           }
1272         } else {
1273           for (i=0; i<nzB; i++) {
1274             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1275             else break;
1276           }
1277           imark = i;
1278         }
1279         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1280         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1281       }
1282     } else {
1283       if (idx) *idx = NULL;
1284       if (v)   *v   = NULL;
1285     }
1286   }
1287   *nz  = nztot;
1288   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1289   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1294 {
1295   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1296 
1297   PetscFunctionBegin;
1298   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1299   baij->getrowactive = PETSC_FALSE;
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1304 {
1305   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1306   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1307 
1308   PetscFunctionBegin;
1309   aA->getrow_utriangular = PETSC_TRUE;
1310   PetscFunctionReturn(0);
1311 }
1312 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1313 {
1314   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1315   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1316 
1317   PetscFunctionBegin;
1318   aA->getrow_utriangular = PETSC_FALSE;
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1323 {
1324 #if defined(PETSC_USE_COMPLEX)
1325   PetscErrorCode ierr;
1326   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
1327 
1328   PetscFunctionBegin;
1329   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1330   ierr = MatConjugate(a->B);CHKERRQ(ierr);
1331 #else
1332   PetscFunctionBegin;
1333 #endif
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1338 {
1339   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1340   PetscErrorCode ierr;
1341 
1342   PetscFunctionBegin;
1343   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1344   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1349 {
1350   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1351   PetscErrorCode ierr;
1352 
1353   PetscFunctionBegin;
1354   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1355   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1360    Input: isrow       - distributed(parallel),
1361           iscol_local - locally owned (seq)
1362 */
1363 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1364 {
1365   PetscErrorCode ierr;
1366   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1367   const PetscInt *ptr1,*ptr2;
1368 
1369   PetscFunctionBegin;
1370   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1371   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1372   if (sz1 > sz2) {
1373     *flg = PETSC_FALSE;
1374     PetscFunctionReturn(0);
1375   }
1376 
1377   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1378   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1379 
1380   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1381   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1382   ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr);
1383   ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr);
1384   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1385   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1386 
1387   nmatch=0;
1388   k     = 0;
1389   for (i=0; i<sz1; i++){
1390     for (j=k; j<sz2; j++){
1391       if (a1[i] == a2[j]) {
1392         k = j; nmatch++;
1393         break;
1394       }
1395     }
1396   }
1397   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1398   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1399   ierr = PetscFree(a1);CHKERRQ(ierr);
1400   ierr = PetscFree(a2);CHKERRQ(ierr);
1401   if (nmatch < sz1) {
1402     *flg = PETSC_FALSE;
1403   } else {
1404     *flg = PETSC_TRUE;
1405   }
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1410 {
1411   PetscErrorCode ierr;
1412   IS             iscol_local;
1413   PetscInt       csize;
1414   PetscBool      isequal;
1415 
1416   PetscFunctionBegin;
1417   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1418   if (call == MAT_REUSE_MATRIX) {
1419     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1420     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1421   } else {
1422     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1423     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1424     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1425   }
1426 
1427   /* now call MatCreateSubMatrix_MPIBAIJ() */
1428   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1429   if (call == MAT_INITIAL_MATRIX) {
1430     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1431     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1432   }
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1437 {
1438   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1439   PetscErrorCode ierr;
1440 
1441   PetscFunctionBegin;
1442   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1443   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1448 {
1449   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1450   Mat            A  = a->A,B = a->B;
1451   PetscErrorCode ierr;
1452   PetscLogDouble isend[5],irecv[5];
1453 
1454   PetscFunctionBegin;
1455   info->block_size = (PetscReal)matin->rmap->bs;
1456 
1457   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1458 
1459   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1460   isend[3] = info->memory;  isend[4] = info->mallocs;
1461 
1462   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1463 
1464   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1465   isend[3] += info->memory;  isend[4] += info->mallocs;
1466   if (flag == MAT_LOCAL) {
1467     info->nz_used      = isend[0];
1468     info->nz_allocated = isend[1];
1469     info->nz_unneeded  = isend[2];
1470     info->memory       = isend[3];
1471     info->mallocs      = isend[4];
1472   } else if (flag == MAT_GLOBAL_MAX) {
1473     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1474 
1475     info->nz_used      = irecv[0];
1476     info->nz_allocated = irecv[1];
1477     info->nz_unneeded  = irecv[2];
1478     info->memory       = irecv[3];
1479     info->mallocs      = irecv[4];
1480   } else if (flag == MAT_GLOBAL_SUM) {
1481     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1482 
1483     info->nz_used      = irecv[0];
1484     info->nz_allocated = irecv[1];
1485     info->nz_unneeded  = irecv[2];
1486     info->memory       = irecv[3];
1487     info->mallocs      = irecv[4];
1488   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1489   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1490   info->fill_ratio_needed = 0;
1491   info->factor_mallocs    = 0;
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1496 {
1497   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1498   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1499   PetscErrorCode ierr;
1500 
1501   PetscFunctionBegin;
1502   switch (op) {
1503   case MAT_NEW_NONZERO_LOCATIONS:
1504   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1505   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1506   case MAT_KEEP_NONZERO_PATTERN:
1507   case MAT_SUBMAT_SINGLEIS:
1508   case MAT_NEW_NONZERO_LOCATION_ERR:
1509     MatCheckPreallocated(A,1);
1510     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1511     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1512     break;
1513   case MAT_ROW_ORIENTED:
1514     MatCheckPreallocated(A,1);
1515     a->roworiented = flg;
1516 
1517     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1518     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1519     break;
1520   case MAT_FORCE_DIAGONAL_ENTRIES:
1521   case MAT_SORTED_FULL:
1522     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1523     break;
1524   case MAT_IGNORE_OFF_PROC_ENTRIES:
1525     a->donotstash = flg;
1526     break;
1527   case MAT_USE_HASH_TABLE:
1528     a->ht_flag = flg;
1529     break;
1530   case MAT_HERMITIAN:
1531     MatCheckPreallocated(A,1);
1532     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1533 #if defined(PETSC_USE_COMPLEX)
1534     if (flg) { /* need different mat-vec ops */
1535       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1536       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1537       A->ops->multtranspose    = NULL;
1538       A->ops->multtransposeadd = NULL;
1539       A->symmetric = PETSC_FALSE;
1540     }
1541 #endif
1542     break;
1543   case MAT_SPD:
1544   case MAT_SYMMETRIC:
1545     MatCheckPreallocated(A,1);
1546     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1547 #if defined(PETSC_USE_COMPLEX)
1548     if (flg) { /* restore to use default mat-vec ops */
1549       A->ops->mult             = MatMult_MPISBAIJ;
1550       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1551       A->ops->multtranspose    = MatMult_MPISBAIJ;
1552       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1553     }
1554 #endif
1555     break;
1556   case MAT_STRUCTURALLY_SYMMETRIC:
1557     MatCheckPreallocated(A,1);
1558     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1559     break;
1560   case MAT_SYMMETRY_ETERNAL:
1561     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1562     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1563     break;
1564   case MAT_IGNORE_LOWER_TRIANGULAR:
1565     aA->ignore_ltriangular = flg;
1566     break;
1567   case MAT_ERROR_LOWER_TRIANGULAR:
1568     aA->ignore_ltriangular = flg;
1569     break;
1570   case MAT_GETROW_UPPERTRIANGULAR:
1571     aA->getrow_utriangular = flg;
1572     break;
1573   default:
1574     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1575   }
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1580 {
1581   PetscErrorCode ierr;
1582 
1583   PetscFunctionBegin;
1584   if (reuse == MAT_INITIAL_MATRIX) {
1585     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1586   }  else if (reuse == MAT_REUSE_MATRIX) {
1587     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1588   }
1589   PetscFunctionReturn(0);
1590 }
1591 
1592 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1593 {
1594   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1595   Mat            a     = baij->A, b=baij->B;
1596   PetscErrorCode ierr;
1597   PetscInt       nv,m,n;
1598   PetscBool      flg;
1599 
1600   PetscFunctionBegin;
1601   if (ll != rr) {
1602     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1603     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1604   }
1605   if (!ll) PetscFunctionReturn(0);
1606 
1607   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1608   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1609 
1610   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1611   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1612 
1613   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1614 
1615   /* left diagonalscale the off-diagonal part */
1616   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1617 
1618   /* scale the diagonal part */
1619   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1620 
1621   /* right diagonalscale the off-diagonal part */
1622   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1623   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1628 {
1629   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1630   PetscErrorCode ierr;
1631 
1632   PetscFunctionBegin;
1633   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1638 
1639 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1640 {
1641   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1642   Mat            a,b,c,d;
1643   PetscBool      flg;
1644   PetscErrorCode ierr;
1645 
1646   PetscFunctionBegin;
1647   a = matA->A; b = matA->B;
1648   c = matB->A; d = matB->B;
1649 
1650   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1651   if (flg) {
1652     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1653   }
1654   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1655   PetscFunctionReturn(0);
1656 }
1657 
1658 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1659 {
1660   PetscErrorCode ierr;
1661   PetscBool      isbaij;
1662 
1663   PetscFunctionBegin;
1664   ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
1665   if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1666   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1667   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1668     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1669     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1670     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1671   } else {
1672     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1673     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1674 
1675     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1676     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1677   }
1678   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1683 {
1684   PetscErrorCode ierr;
1685 
1686   PetscFunctionBegin;
1687   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1692 {
1693   PetscErrorCode ierr;
1694   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1695   PetscBLASInt   bnz,one=1;
1696   Mat_SeqSBAIJ   *xa,*ya;
1697   Mat_SeqBAIJ    *xb,*yb;
1698 
1699   PetscFunctionBegin;
1700   if (str == SAME_NONZERO_PATTERN) {
1701     PetscScalar alpha = a;
1702     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1703     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1704     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1705     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1706     xb   = (Mat_SeqBAIJ*)xx->B->data;
1707     yb   = (Mat_SeqBAIJ*)yy->B->data;
1708     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1709     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1710     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1711   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1712     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1713     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1714     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1715   } else {
1716     Mat      B;
1717     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1718     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1719     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1720     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1721     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1722     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1723     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1724     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1725     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1726     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1727     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1728     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1729     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1730     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1731     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1732     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1733     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1734     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1735     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1736     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1737   }
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1742 {
1743   PetscErrorCode ierr;
1744   PetscInt       i;
1745   PetscBool      flg;
1746 
1747   PetscFunctionBegin;
1748   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1749   for (i=0; i<n; i++) {
1750     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1751     if (!flg) {
1752       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1753     }
1754   }
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1759 {
1760   PetscErrorCode ierr;
1761   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1762   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1763 
1764   PetscFunctionBegin;
1765   if (!Y->preallocated) {
1766     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1767   } else if (!aij->nz) {
1768     PetscInt nonew = aij->nonew;
1769     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1770     aij->nonew = nonew;
1771   }
1772   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1777 {
1778   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1779   PetscErrorCode ierr;
1780 
1781   PetscFunctionBegin;
1782   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1783   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1784   if (d) {
1785     PetscInt rstart;
1786     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1787     *d += rstart/A->rmap->bs;
1788 
1789   }
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1794 {
1795   PetscFunctionBegin;
1796   *a = ((Mat_MPISBAIJ*)A->data)->A;
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 /* -------------------------------------------------------------------*/
1801 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1802                                        MatGetRow_MPISBAIJ,
1803                                        MatRestoreRow_MPISBAIJ,
1804                                        MatMult_MPISBAIJ,
1805                                /*  4*/ MatMultAdd_MPISBAIJ,
1806                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1807                                        MatMultAdd_MPISBAIJ,
1808                                        NULL,
1809                                        NULL,
1810                                        NULL,
1811                                /* 10*/ NULL,
1812                                        NULL,
1813                                        NULL,
1814                                        MatSOR_MPISBAIJ,
1815                                        MatTranspose_MPISBAIJ,
1816                                /* 15*/ MatGetInfo_MPISBAIJ,
1817                                        MatEqual_MPISBAIJ,
1818                                        MatGetDiagonal_MPISBAIJ,
1819                                        MatDiagonalScale_MPISBAIJ,
1820                                        MatNorm_MPISBAIJ,
1821                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1822                                        MatAssemblyEnd_MPISBAIJ,
1823                                        MatSetOption_MPISBAIJ,
1824                                        MatZeroEntries_MPISBAIJ,
1825                                /* 24*/ NULL,
1826                                        NULL,
1827                                        NULL,
1828                                        NULL,
1829                                        NULL,
1830                                /* 29*/ MatSetUp_MPISBAIJ,
1831                                        NULL,
1832                                        NULL,
1833                                        MatGetDiagonalBlock_MPISBAIJ,
1834                                        NULL,
1835                                /* 34*/ MatDuplicate_MPISBAIJ,
1836                                        NULL,
1837                                        NULL,
1838                                        NULL,
1839                                        NULL,
1840                                /* 39*/ MatAXPY_MPISBAIJ,
1841                                        MatCreateSubMatrices_MPISBAIJ,
1842                                        MatIncreaseOverlap_MPISBAIJ,
1843                                        MatGetValues_MPISBAIJ,
1844                                        MatCopy_MPISBAIJ,
1845                                /* 44*/ NULL,
1846                                        MatScale_MPISBAIJ,
1847                                        MatShift_MPISBAIJ,
1848                                        NULL,
1849                                        NULL,
1850                                /* 49*/ NULL,
1851                                        NULL,
1852                                        NULL,
1853                                        NULL,
1854                                        NULL,
1855                                /* 54*/ NULL,
1856                                        NULL,
1857                                        MatSetUnfactored_MPISBAIJ,
1858                                        NULL,
1859                                        MatSetValuesBlocked_MPISBAIJ,
1860                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1861                                        NULL,
1862                                        NULL,
1863                                        NULL,
1864                                        NULL,
1865                                /* 64*/ NULL,
1866                                        NULL,
1867                                        NULL,
1868                                        NULL,
1869                                        NULL,
1870                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1871                                        NULL,
1872                                        MatConvert_MPISBAIJ_Basic,
1873                                        NULL,
1874                                        NULL,
1875                                /* 74*/ NULL,
1876                                        NULL,
1877                                        NULL,
1878                                        NULL,
1879                                        NULL,
1880                                /* 79*/ NULL,
1881                                        NULL,
1882                                        NULL,
1883                                        NULL,
1884                                        MatLoad_MPISBAIJ,
1885                                /* 84*/ NULL,
1886                                        NULL,
1887                                        NULL,
1888                                        NULL,
1889                                        NULL,
1890                                /* 89*/ NULL,
1891                                        NULL,
1892                                        NULL,
1893                                        NULL,
1894                                        NULL,
1895                                /* 94*/ NULL,
1896                                        NULL,
1897                                        NULL,
1898                                        NULL,
1899                                        NULL,
1900                                /* 99*/ NULL,
1901                                        NULL,
1902                                        NULL,
1903                                        MatConjugate_MPISBAIJ,
1904                                        NULL,
1905                                /*104*/ NULL,
1906                                        MatRealPart_MPISBAIJ,
1907                                        MatImaginaryPart_MPISBAIJ,
1908                                        MatGetRowUpperTriangular_MPISBAIJ,
1909                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1910                                /*109*/ NULL,
1911                                        NULL,
1912                                        NULL,
1913                                        NULL,
1914                                        MatMissingDiagonal_MPISBAIJ,
1915                                /*114*/ NULL,
1916                                        NULL,
1917                                        NULL,
1918                                        NULL,
1919                                        NULL,
1920                                /*119*/ NULL,
1921                                        NULL,
1922                                        NULL,
1923                                        NULL,
1924                                        NULL,
1925                                /*124*/ NULL,
1926                                        NULL,
1927                                        NULL,
1928                                        NULL,
1929                                        NULL,
1930                                /*129*/ NULL,
1931                                        NULL,
1932                                        NULL,
1933                                        NULL,
1934                                        NULL,
1935                                /*134*/ NULL,
1936                                        NULL,
1937                                        NULL,
1938                                        NULL,
1939                                        NULL,
1940                                /*139*/ MatSetBlockSizes_Default,
1941                                        NULL,
1942                                        NULL,
1943                                        NULL,
1944                                        NULL,
1945                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1946 };
1947 
1948 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1949 {
1950   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1951   PetscErrorCode ierr;
1952   PetscInt       i,mbs,Mbs;
1953   PetscMPIInt    size;
1954 
1955   PetscFunctionBegin;
1956   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1957   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1958   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1959   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1960   if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N);
1961   if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n);
1962 
1963   mbs = B->rmap->n/bs;
1964   Mbs = B->rmap->N/bs;
1965   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1966 
1967   B->rmap->bs = bs;
1968   b->bs2      = bs*bs;
1969   b->mbs      = mbs;
1970   b->Mbs      = Mbs;
1971   b->nbs      = B->cmap->n/bs;
1972   b->Nbs      = B->cmap->N/bs;
1973 
1974   for (i=0; i<=b->size; i++) {
1975     b->rangebs[i] = B->rmap->range[i]/bs;
1976   }
1977   b->rstartbs = B->rmap->rstart/bs;
1978   b->rendbs   = B->rmap->rend/bs;
1979 
1980   b->cstartbs = B->cmap->rstart/bs;
1981   b->cendbs   = B->cmap->rend/bs;
1982 
1983 #if defined(PETSC_USE_CTABLE)
1984   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
1985 #else
1986   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
1987 #endif
1988   ierr = PetscFree(b->garray);CHKERRQ(ierr);
1989   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
1990   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
1991   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
1992   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
1993   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
1994   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
1995   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
1996   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
1997 
1998   /* Because the B will have been resized we simply destroy it and create a new one each time */
1999   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2000   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2001   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2002   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2003   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2004   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2005 
2006   if (!B->preallocated) {
2007     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2008     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2009     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2010     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2011     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2012   }
2013 
2014   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2015   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2016 
2017   B->preallocated  = PETSC_TRUE;
2018   B->was_assembled = PETSC_FALSE;
2019   B->assembled     = PETSC_FALSE;
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2024 {
2025   PetscInt       m,rstart,cend;
2026   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2027   const PetscInt *JJ    =NULL;
2028   PetscScalar    *values=NULL;
2029   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
2030   PetscErrorCode ierr;
2031   PetscBool      nooffprocentries;
2032 
2033   PetscFunctionBegin;
2034   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2035   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2036   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2037   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2038   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2039   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2040   m      = B->rmap->n/bs;
2041   rstart = B->rmap->rstart/bs;
2042   cend   = B->cmap->rend/bs;
2043 
2044   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2045   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2046   for (i=0; i<m; i++) {
2047     nz = ii[i+1] - ii[i];
2048     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2049     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2050     JJ     = jj + ii[i];
2051     bd     = 0;
2052     for (j=0; j<nz; j++) {
2053       if (*JJ >= i + rstart) break;
2054       JJ++;
2055       bd++;
2056     }
2057     d  = 0;
2058     for (; j<nz; j++) {
2059       if (*JJ++ >= cend) break;
2060       d++;
2061     }
2062     d_nnz[i] = d;
2063     o_nnz[i] = nz - d - bd;
2064     nz       = nz - bd;
2065     nz_max = PetscMax(nz_max,nz);
2066   }
2067   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2068   ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2069   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2070 
2071   values = (PetscScalar*)V;
2072   if (!values) {
2073     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2074   }
2075   for (i=0; i<m; i++) {
2076     PetscInt          row    = i + rstart;
2077     PetscInt          ncols  = ii[i+1] - ii[i];
2078     const PetscInt    *icols = jj + ii[i];
2079     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2080       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2081       ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2082     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2083       PetscInt j;
2084       for (j=0; j<ncols; j++) {
2085         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2086         ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2087       }
2088     }
2089   }
2090 
2091   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2092   nooffprocentries    = B->nooffprocentries;
2093   B->nooffprocentries = PETSC_TRUE;
2094   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2095   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2096   B->nooffprocentries = nooffprocentries;
2097 
2098   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2099   PetscFunctionReturn(0);
2100 }
2101 
2102 /*MC
2103    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2104    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2105    the matrix is stored.
2106 
2107    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2108    can call MatSetOption(Mat, MAT_HERMITIAN);
2109 
2110    Options Database Keys:
2111 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2112 
2113    Notes:
2114      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2115      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2116 
2117    Level: beginner
2118 
2119 .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType
2120 M*/
2121 
2122 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2123 {
2124   Mat_MPISBAIJ   *b;
2125   PetscErrorCode ierr;
2126   PetscBool      flg = PETSC_FALSE;
2127 
2128   PetscFunctionBegin;
2129   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2130   B->data = (void*)b;
2131   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2132 
2133   B->ops->destroy = MatDestroy_MPISBAIJ;
2134   B->ops->view    = MatView_MPISBAIJ;
2135   B->assembled    = PETSC_FALSE;
2136   B->insertmode   = NOT_SET_VALUES;
2137 
2138   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
2139   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRMPI(ierr);
2140 
2141   /* build local table of row and column ownerships */
2142   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2143 
2144   /* build cache for off array entries formed */
2145   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2146 
2147   b->donotstash  = PETSC_FALSE;
2148   b->colmap      = NULL;
2149   b->garray      = NULL;
2150   b->roworiented = PETSC_TRUE;
2151 
2152   /* stuff used in block assembly */
2153   b->barray = NULL;
2154 
2155   /* stuff used for matrix vector multiply */
2156   b->lvec    = NULL;
2157   b->Mvctx   = NULL;
2158   b->slvec0  = NULL;
2159   b->slvec0b = NULL;
2160   b->slvec1  = NULL;
2161   b->slvec1a = NULL;
2162   b->slvec1b = NULL;
2163   b->sMvctx  = NULL;
2164 
2165   /* stuff for MatGetRow() */
2166   b->rowindices   = NULL;
2167   b->rowvalues    = NULL;
2168   b->getrowactive = PETSC_FALSE;
2169 
2170   /* hash table stuff */
2171   b->ht           = NULL;
2172   b->hd           = NULL;
2173   b->ht_size      = 0;
2174   b->ht_flag      = PETSC_FALSE;
2175   b->ht_fact      = 0;
2176   b->ht_total_ct  = 0;
2177   b->ht_insert_ct = 0;
2178 
2179   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2180   b->ijonly = PETSC_FALSE;
2181 
2182   b->in_loc = NULL;
2183   b->v_loc  = NULL;
2184   b->n_loc  = 0;
2185 
2186   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2187   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2188   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2189   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2190 #if defined(PETSC_HAVE_ELEMENTAL)
2191   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2192 #endif
2193 #if defined(PETSC_HAVE_SCALAPACK)
2194   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK);CHKERRQ(ierr);
2195 #endif
2196   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2197   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2198 
2199   B->symmetric                  = PETSC_TRUE;
2200   B->structurally_symmetric     = PETSC_TRUE;
2201   B->symmetric_set              = PETSC_TRUE;
2202   B->structurally_symmetric_set = PETSC_TRUE;
2203   B->symmetric_eternal          = PETSC_TRUE;
2204 #if defined(PETSC_USE_COMPLEX)
2205   B->hermitian                  = PETSC_FALSE;
2206   B->hermitian_set              = PETSC_FALSE;
2207 #else
2208   B->hermitian                  = PETSC_TRUE;
2209   B->hermitian_set              = PETSC_TRUE;
2210 #endif
2211 
2212   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2213   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2214   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2215   if (flg) {
2216     PetscReal fact = 1.39;
2217     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2218     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2219     if (fact <= 1.0) fact = 1.39;
2220     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2221     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2222   }
2223   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 /*MC
2228    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2229 
2230    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2231    and MATMPISBAIJ otherwise.
2232 
2233    Options Database Keys:
2234 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2235 
2236   Level: beginner
2237 
2238 .seealso: MatCreateMPISBAIJ, MATSEQSBAIJ, MATMPISBAIJ
2239 M*/
2240 
2241 /*@C
2242    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2243    the user should preallocate the matrix storage by setting the parameters
2244    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2245    performance can be increased by more than a factor of 50.
2246 
2247    Collective on Mat
2248 
2249    Input Parameters:
2250 +  B - the matrix
2251 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2252           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2253 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2254            submatrix  (same for all local rows)
2255 .  d_nnz - array containing the number of block nonzeros in the various block rows
2256            in the upper triangular and diagonal part of the in diagonal portion of the local
2257            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2258            for the diagonal entry and set a value even if it is zero.
2259 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2260            submatrix (same for all local rows).
2261 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2262            off-diagonal portion of the local submatrix that is right of the diagonal
2263            (possibly different for each block row) or NULL.
2264 
2265 
2266    Options Database Keys:
2267 +   -mat_no_unroll - uses code that does not unroll the loops in the
2268                      block calculations (much slower)
2269 -   -mat_block_size - size of the blocks to use
2270 
2271    Notes:
2272 
2273    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2274    than it must be used on all processors that share the object for that argument.
2275 
2276    If the *_nnz parameter is given then the *_nz parameter is ignored
2277 
2278    Storage Information:
2279    For a square global matrix we define each processor's diagonal portion
2280    to be its local rows and the corresponding columns (a square submatrix);
2281    each processor's off-diagonal portion encompasses the remainder of the
2282    local matrix (a rectangular submatrix).
2283 
2284    The user can specify preallocated storage for the diagonal part of
2285    the local submatrix with either d_nz or d_nnz (not both).  Set
2286    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2287    memory allocation.  Likewise, specify preallocated storage for the
2288    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2289 
2290    You can call MatGetInfo() to get information on how effective the preallocation was;
2291    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2292    You can also run with the option -info and look for messages with the string
2293    malloc in them to see if additional memory allocation was needed.
2294 
2295    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2296    the figure below we depict these three local rows and all columns (0-11).
2297 
2298 .vb
2299            0 1 2 3 4 5 6 7 8 9 10 11
2300           --------------------------
2301    row 3  |. . . d d d o o o o  o  o
2302    row 4  |. . . d d d o o o o  o  o
2303    row 5  |. . . d d d o o o o  o  o
2304           --------------------------
2305 .ve
2306 
2307    Thus, any entries in the d locations are stored in the d (diagonal)
2308    submatrix, and any entries in the o locations are stored in the
2309    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2310    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2311 
2312    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2313    plus the diagonal part of the d matrix,
2314    and o_nz should indicate the number of block nonzeros per row in the o matrix
2315 
2316    In general, for PDE problems in which most nonzeros are near the diagonal,
2317    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2318    or you will get TERRIBLE performance; see the users' manual chapter on
2319    matrices.
2320 
2321    Level: intermediate
2322 
2323 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2324 @*/
2325 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2326 {
2327   PetscErrorCode ierr;
2328 
2329   PetscFunctionBegin;
2330   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2331   PetscValidType(B,1);
2332   PetscValidLogicalCollectiveInt(B,bs,2);
2333   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 /*@C
2338    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2339    (block compressed row).  For good matrix assembly performance
2340    the user should preallocate the matrix storage by setting the parameters
2341    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2342    performance can be increased by more than a factor of 50.
2343 
2344    Collective
2345 
2346    Input Parameters:
2347 +  comm - MPI communicator
2348 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2349           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2350 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2351            This value should be the same as the local size used in creating the
2352            y vector for the matrix-vector product y = Ax.
2353 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2354            This value should be the same as the local size used in creating the
2355            x vector for the matrix-vector product y = Ax.
2356 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2357 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2358 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2359            submatrix  (same for all local rows)
2360 .  d_nnz - array containing the number of block nonzeros in the various block rows
2361            in the upper triangular portion of the in diagonal portion of the local
2362            (possibly different for each block block row) or NULL.
2363            If you plan to factor the matrix you must leave room for the diagonal entry and
2364            set its value even if it is zero.
2365 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2366            submatrix (same for all local rows).
2367 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2368            off-diagonal portion of the local submatrix (possibly different for
2369            each block row) or NULL.
2370 
2371    Output Parameter:
2372 .  A - the matrix
2373 
2374    Options Database Keys:
2375 +   -mat_no_unroll - uses code that does not unroll the loops in the
2376                      block calculations (much slower)
2377 .   -mat_block_size - size of the blocks to use
2378 -   -mat_mpi - use the parallel matrix data structures even on one processor
2379                (defaults to using SeqBAIJ format on one processor)
2380 
2381    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2382    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2383    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2384 
2385    Notes:
2386    The number of rows and columns must be divisible by blocksize.
2387    This matrix type does not support complex Hermitian operation.
2388 
2389    The user MUST specify either the local or global matrix dimensions
2390    (possibly both).
2391 
2392    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2393    than it must be used on all processors that share the object for that argument.
2394 
2395    If the *_nnz parameter is given then the *_nz parameter is ignored
2396 
2397    Storage Information:
2398    For a square global matrix we define each processor's diagonal portion
2399    to be its local rows and the corresponding columns (a square submatrix);
2400    each processor's off-diagonal portion encompasses the remainder of the
2401    local matrix (a rectangular submatrix).
2402 
2403    The user can specify preallocated storage for the diagonal part of
2404    the local submatrix with either d_nz or d_nnz (not both).  Set
2405    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2406    memory allocation.  Likewise, specify preallocated storage for the
2407    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2408 
2409    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2410    the figure below we depict these three local rows and all columns (0-11).
2411 
2412 .vb
2413            0 1 2 3 4 5 6 7 8 9 10 11
2414           --------------------------
2415    row 3  |. . . d d d o o o o  o  o
2416    row 4  |. . . d d d o o o o  o  o
2417    row 5  |. . . d d d o o o o  o  o
2418           --------------------------
2419 .ve
2420 
2421    Thus, any entries in the d locations are stored in the d (diagonal)
2422    submatrix, and any entries in the o locations are stored in the
2423    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2424    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2425 
2426    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2427    plus the diagonal part of the d matrix,
2428    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2429    In general, for PDE problems in which most nonzeros are near the diagonal,
2430    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2431    or you will get TERRIBLE performance; see the users' manual chapter on
2432    matrices.
2433 
2434    Level: intermediate
2435 
2436 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2437 @*/
2438 
2439 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2440 {
2441   PetscErrorCode ierr;
2442   PetscMPIInt    size;
2443 
2444   PetscFunctionBegin;
2445   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2446   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2447   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2448   if (size > 1) {
2449     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2450     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2451   } else {
2452     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2453     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2454   }
2455   PetscFunctionReturn(0);
2456 }
2457 
2458 
2459 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2460 {
2461   Mat            mat;
2462   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2463   PetscErrorCode ierr;
2464   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2465   PetscScalar    *array;
2466 
2467   PetscFunctionBegin;
2468   *newmat = NULL;
2469 
2470   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2471   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2472   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2473   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2474   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2475 
2476   mat->factortype   = matin->factortype;
2477   mat->preallocated = PETSC_TRUE;
2478   mat->assembled    = PETSC_TRUE;
2479   mat->insertmode   = NOT_SET_VALUES;
2480 
2481   a      = (Mat_MPISBAIJ*)mat->data;
2482   a->bs2 = oldmat->bs2;
2483   a->mbs = oldmat->mbs;
2484   a->nbs = oldmat->nbs;
2485   a->Mbs = oldmat->Mbs;
2486   a->Nbs = oldmat->Nbs;
2487 
2488   a->size         = oldmat->size;
2489   a->rank         = oldmat->rank;
2490   a->donotstash   = oldmat->donotstash;
2491   a->roworiented  = oldmat->roworiented;
2492   a->rowindices   = NULL;
2493   a->rowvalues    = NULL;
2494   a->getrowactive = PETSC_FALSE;
2495   a->barray       = NULL;
2496   a->rstartbs     = oldmat->rstartbs;
2497   a->rendbs       = oldmat->rendbs;
2498   a->cstartbs     = oldmat->cstartbs;
2499   a->cendbs       = oldmat->cendbs;
2500 
2501   /* hash table stuff */
2502   a->ht           = NULL;
2503   a->hd           = NULL;
2504   a->ht_size      = 0;
2505   a->ht_flag      = oldmat->ht_flag;
2506   a->ht_fact      = oldmat->ht_fact;
2507   a->ht_total_ct  = 0;
2508   a->ht_insert_ct = 0;
2509 
2510   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr);
2511   if (oldmat->colmap) {
2512 #if defined(PETSC_USE_CTABLE)
2513     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2514 #else
2515     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2516     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2517     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
2518 #endif
2519   } else a->colmap = NULL;
2520 
2521   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2522     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2523     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2524     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
2525   } else a->garray = NULL;
2526 
2527   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2528   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2529   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2530   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2531   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2532 
2533   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2534   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2535   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2536   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2537 
2538   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2539   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2540   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2541   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2542   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2543   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2544   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2545   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2546   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2547   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2548   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2549   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2550   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2551 
2552   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2553   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2554   a->sMvctx = oldmat->sMvctx;
2555   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2556 
2557   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2558   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2559   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2560   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2561   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2562   *newmat = mat;
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2567 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2568 
2569 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
2570 {
2571   PetscErrorCode ierr;
2572   PetscBool      isbinary;
2573 
2574   PetscFunctionBegin;
2575   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2576   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
2577   ierr = MatLoad_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 /*XXXXX@
2582    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2583 
2584    Input Parameters:
2585 .  mat  - the matrix
2586 .  fact - factor
2587 
2588    Not Collective on Mat, each process can have a different hash factor
2589 
2590    Level: advanced
2591 
2592   Notes:
2593    This can also be set by the command line option: -mat_use_hash_table fact
2594 
2595 .seealso: MatSetOption()
2596 @XXXXX*/
2597 
2598 
2599 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2600 {
2601   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2602   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2603   PetscReal      atmp;
2604   PetscReal      *work,*svalues,*rvalues;
2605   PetscErrorCode ierr;
2606   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2607   PetscMPIInt    rank,size;
2608   PetscInt       *rowners_bs,dest,count,source;
2609   PetscScalar    *va;
2610   MatScalar      *ba;
2611   MPI_Status     stat;
2612 
2613   PetscFunctionBegin;
2614   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2615   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2616   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2617 
2618   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
2619   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
2620 
2621   bs  = A->rmap->bs;
2622   mbs = a->mbs;
2623   Mbs = a->Mbs;
2624   ba  = b->a;
2625   bi  = b->i;
2626   bj  = b->j;
2627 
2628   /* find ownerships */
2629   rowners_bs = A->rmap->range;
2630 
2631   /* each proc creates an array to be distributed */
2632   ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2633 
2634   /* row_max for B */
2635   if (rank != size-1) {
2636     for (i=0; i<mbs; i++) {
2637       ncols = bi[1] - bi[0]; bi++;
2638       brow  = bs*i;
2639       for (j=0; j<ncols; j++) {
2640         bcol = bs*(*bj);
2641         for (kcol=0; kcol<bs; kcol++) {
2642           col  = bcol + kcol;                /* local col index */
2643           col += rowners_bs[rank+1];      /* global col index */
2644           for (krow=0; krow<bs; krow++) {
2645             atmp = PetscAbsScalar(*ba); ba++;
2646             row  = brow + krow;   /* local row index */
2647             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2648             if (work[col] < atmp) work[col] = atmp;
2649           }
2650         }
2651         bj++;
2652       }
2653     }
2654 
2655     /* send values to its owners */
2656     for (dest=rank+1; dest<size; dest++) {
2657       svalues = work + rowners_bs[dest];
2658       count   = rowners_bs[dest+1]-rowners_bs[dest];
2659       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2660     }
2661   }
2662 
2663   /* receive values */
2664   if (rank) {
2665     rvalues = work;
2666     count   = rowners_bs[rank+1]-rowners_bs[rank];
2667     for (source=0; source<rank; source++) {
2668       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRMPI(ierr);
2669       /* process values */
2670       for (i=0; i<count; i++) {
2671         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2672       }
2673     }
2674   }
2675 
2676   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2677   ierr = PetscFree(work);CHKERRQ(ierr);
2678   PetscFunctionReturn(0);
2679 }
2680 
2681 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2682 {
2683   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2684   PetscErrorCode    ierr;
2685   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2686   PetscScalar       *x,*ptr,*from;
2687   Vec               bb1;
2688   const PetscScalar *b;
2689 
2690   PetscFunctionBegin;
2691   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2692   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2693 
2694   if (flag == SOR_APPLY_UPPER) {
2695     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2696     PetscFunctionReturn(0);
2697   }
2698 
2699   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2700     if (flag & SOR_ZERO_INITIAL_GUESS) {
2701       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2702       its--;
2703     }
2704 
2705     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2706     while (its--) {
2707 
2708       /* lower triangular part: slvec0b = - B^T*xx */
2709       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2710 
2711       /* copy xx into slvec0a */
2712       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2713       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2714       ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr);
2715       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2716 
2717       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2718 
2719       /* copy bb into slvec1a */
2720       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2721       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2722       ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr);
2723       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2724 
2725       /* set slvec1b = 0 */
2726       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2727 
2728       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2729       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2730       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2731       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2732 
2733       /* upper triangular part: bb1 = bb1 - B*x */
2734       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2735 
2736       /* local diagonal sweep */
2737       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2738     }
2739     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2740   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2741     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2742   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2743     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2744   } else if (flag & SOR_EISENSTAT) {
2745     Vec               xx1;
2746     PetscBool         hasop;
2747     const PetscScalar *diag;
2748     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2749     PetscInt          i,n;
2750 
2751     if (!mat->xx1) {
2752       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2753       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2754     }
2755     xx1 = mat->xx1;
2756     bb1 = mat->bb1;
2757 
2758     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2759 
2760     if (!mat->diag) {
2761       /* this is wrong for same matrix with new nonzero values */
2762       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2763       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2764     }
2765     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2766 
2767     if (hasop) {
2768       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2769       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2770     } else {
2771       /*
2772           These two lines are replaced by code that may be a bit faster for a good compiler
2773       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2774       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2775       */
2776       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2777       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2778       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2779       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2780       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2781       if (omega == 1.0) {
2782         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2783         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2784       } else {
2785         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2786         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2787       }
2788       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2789       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2790       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2791       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2792     }
2793 
2794     /* multiply off-diagonal portion of matrix */
2795     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2796     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2797     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2798     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2799     ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
2800     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2801     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2802     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2803     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2804     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2805 
2806     /* local sweep */
2807     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2808     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2809   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2810   PetscFunctionReturn(0);
2811 }
2812 
2813 /*@
2814      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2815          CSR format the local rows.
2816 
2817    Collective
2818 
2819    Input Parameters:
2820 +  comm - MPI communicator
2821 .  bs - the block size, only a block size of 1 is supported
2822 .  m - number of local rows (Cannot be PETSC_DECIDE)
2823 .  n - This value should be the same as the local size used in creating the
2824        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2825        calculated if N is given) For square matrices n is almost always m.
2826 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2827 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2828 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2829 .   j - column indices
2830 -   a - matrix values
2831 
2832    Output Parameter:
2833 .   mat - the matrix
2834 
2835    Level: intermediate
2836 
2837    Notes:
2838        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2839      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2840      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2841 
2842        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2843 
2844 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2845           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2846 @*/
2847 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2848 {
2849   PetscErrorCode ierr;
2850 
2851 
2852   PetscFunctionBegin;
2853   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2854   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2855   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2856   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2857   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2858   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2859   PetscFunctionReturn(0);
2860 }
2861 
2862 
2863 /*@C
2864    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2865 
2866    Collective
2867 
2868    Input Parameters:
2869 +  B - the matrix
2870 .  bs - the block size
2871 .  i - the indices into j for the start of each local row (starts with zero)
2872 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2873 -  v - optional values in the matrix
2874 
2875    Level: advanced
2876 
2877    Notes:
2878    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2879    and usually the numerical values as well
2880 
2881    Any entries below the diagonal are ignored
2882 
2883 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2884 @*/
2885 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2886 {
2887   PetscErrorCode ierr;
2888 
2889   PetscFunctionBegin;
2890   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2891   PetscFunctionReturn(0);
2892 }
2893 
2894 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2895 {
2896   PetscErrorCode ierr;
2897   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2898   PetscInt       *indx;
2899   PetscScalar    *values;
2900 
2901   PetscFunctionBegin;
2902   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2903   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2904     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2905     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
2906     PetscInt       *bindx,rmax=a->rmax,j;
2907     PetscMPIInt    rank,size;
2908 
2909     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2910     mbs = m/bs; Nbs = N/cbs;
2911     if (n == PETSC_DECIDE) {
2912       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
2913     }
2914     nbs = n/cbs;
2915 
2916     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2917     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
2918 
2919     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
2920     ierr = MPI_Comm_rank(comm,&size);CHKERRMPI(ierr);
2921     if (rank == size-1) {
2922       /* Check sum(nbs) = Nbs */
2923       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
2924     }
2925 
2926     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
2927     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2928     for (i=0; i<mbs; i++) {
2929       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2930       nnz  = nnz/bs;
2931       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2932       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2933       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2934     }
2935     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2936     ierr = PetscFree(bindx);CHKERRQ(ierr);
2937 
2938     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2939     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2940     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2941     ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr);
2942     ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
2943     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2944     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2945   }
2946 
2947   /* numeric phase */
2948   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2949   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
2950 
2951   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2952   for (i=0; i<m; i++) {
2953     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2954     Ii   = i + rstart;
2955     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2956     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2957   }
2958   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2959   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2960   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2961   PetscFunctionReturn(0);
2962 }
2963