xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision c677e75f96b8a8bfc4276d1fe38db417c933afaf)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #if defined(PETSC_HAVE_SCALAPACK)
11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
12 #endif
13 
14 /* This could be moved to matimpl.h */
15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16 {
17   Mat            preallocator;
18   PetscInt       r,rstart,rend;
19   PetscInt       bs,i,m,n,M,N;
20   PetscBool      cong = PETSC_TRUE;
21   PetscErrorCode ierr;
22 
23   PetscFunctionBegin;
24   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
25   PetscValidLogicalCollectiveInt(B,nm,2);
26   for (i = 0; i < nm; i++) {
27     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
28     ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr);
29     if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
30   }
31   PetscValidLogicalCollectiveBool(B,fill,5);
32   ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr);
33   ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr);
34   ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
35   ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr);
36   ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
37   ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr);
38   ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
39   ierr = MatSetUp(preallocator);CHKERRQ(ierr);
40   ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
41   for (r = rstart; r < rend; ++r) {
42     PetscInt          ncols;
43     const PetscInt    *row;
44     const PetscScalar *vals;
45 
46     for (i = 0; i < nm; i++) {
47       ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
48       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
49       if (symm && symm[i]) {
50         ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
51       }
52       ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
53     }
54   }
55   ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
56   ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
57   ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr);
58   ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
59   PetscFunctionReturn(0);
60 }
61 
62 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
63 {
64   Mat            B;
65   PetscErrorCode ierr;
66   PetscInt       r;
67 
68   PetscFunctionBegin;
69   if (reuse != MAT_REUSE_MATRIX) {
70     PetscBool symm = PETSC_TRUE,isdense;
71     PetscInt  bs;
72 
73     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
74     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
75     ierr = MatSetType(B,newtype);CHKERRQ(ierr);
76     ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
77     ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr);
78     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
79     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
80     ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr);
81     if (!isdense) {
82       ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
83       ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr);
84       ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
85     } else {
86       ierr = MatSetUp(B);CHKERRQ(ierr);
87     }
88   } else {
89     B    = *newmat;
90     ierr = MatZeroEntries(B);CHKERRQ(ierr);
91   }
92 
93   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
94   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
95     PetscInt          ncols;
96     const PetscInt    *row;
97     const PetscScalar *vals;
98 
99     ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
100     ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
101 #if defined(PETSC_USE_COMPLEX)
102     if (A->hermitian) {
103       PetscInt i;
104       for (i = 0; i < ncols; i++) {
105         ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr);
106       }
107     } else {
108       ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
109     }
110 #else
111     ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
112 #endif
113     ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
114   }
115   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
116   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
117   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
118 
119   if (reuse == MAT_INPLACE_MATRIX) {
120     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
121   } else {
122     *newmat = B;
123   }
124   PetscFunctionReturn(0);
125 }
126 
127 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
128 {
129   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
130   PetscErrorCode ierr;
131 
132   PetscFunctionBegin;
133   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
134   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
139 {
140   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
141   PetscErrorCode ierr;
142 
143   PetscFunctionBegin;
144   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
145   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
150   { \
151     brow = row/bs;  \
152     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
153     rmax = aimax[brow]; nrow = ailen[brow]; \
154     bcol = col/bs; \
155     ridx = row % bs; cidx = col % bs; \
156     low  = 0; high = nrow; \
157     while (high-low > 3) { \
158       t = (low+high)/2; \
159       if (rp[t] > bcol) high = t; \
160       else              low  = t; \
161     } \
162     for (_i=low; _i<high; _i++) { \
163       if (rp[_i] > bcol) break; \
164       if (rp[_i] == bcol) { \
165         bap = ap + bs2*_i + bs*cidx + ridx; \
166         if (addv == ADD_VALUES) *bap += value;  \
167         else                    *bap  = value;  \
168         goto a_noinsert; \
169       } \
170     } \
171     if (a->nonew == 1) goto a_noinsert; \
172     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
173     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
174     N = nrow++ - 1;  \
175     /* shift up all the later entries in this row */ \
176     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
177     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
178     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
179     rp[_i]                      = bcol;  \
180     ap[bs2*_i + bs*cidx + ridx] = value;  \
181     A->nonzerostate++;\
182 a_noinsert:; \
183     ailen[brow] = nrow; \
184   }
185 
186 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
187   { \
188     brow = row/bs;  \
189     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
190     rmax = bimax[brow]; nrow = bilen[brow]; \
191     bcol = col/bs; \
192     ridx = row % bs; cidx = col % bs; \
193     low  = 0; high = nrow; \
194     while (high-low > 3) { \
195       t = (low+high)/2; \
196       if (rp[t] > bcol) high = t; \
197       else              low  = t; \
198     } \
199     for (_i=low; _i<high; _i++) { \
200       if (rp[_i] > bcol) break; \
201       if (rp[_i] == bcol) { \
202         bap = ap + bs2*_i + bs*cidx + ridx; \
203         if (addv == ADD_VALUES) *bap += value;  \
204         else                    *bap  = value;  \
205         goto b_noinsert; \
206       } \
207     } \
208     if (b->nonew == 1) goto b_noinsert; \
209     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
210     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
211     N = nrow++ - 1;  \
212     /* shift up all the later entries in this row */ \
213     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
214     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
215     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \
216     rp[_i]                      = bcol;  \
217     ap[bs2*_i + bs*cidx + ridx] = value;  \
218     B->nonzerostate++;\
219 b_noinsert:; \
220     bilen[brow] = nrow; \
221   }
222 
223 /* Only add/insert a(i,j) with i<=j (blocks).
224    Any a(i,j) with i>j input by user is ingored or generates an error
225 */
226 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
227 {
228   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
229   MatScalar      value;
230   PetscBool      roworiented = baij->roworiented;
231   PetscErrorCode ierr;
232   PetscInt       i,j,row,col;
233   PetscInt       rstart_orig=mat->rmap->rstart;
234   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
235   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
236 
237   /* Some Variables required in the macro */
238   Mat          A     = baij->A;
239   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
240   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
241   MatScalar    *aa   =a->a;
242 
243   Mat         B     = baij->B;
244   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
245   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
246   MatScalar   *ba   =b->a;
247 
248   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
249   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
250   MatScalar *ap,*bap;
251 
252   /* for stash */
253   PetscInt  n_loc, *in_loc = NULL;
254   MatScalar *v_loc = NULL;
255 
256   PetscFunctionBegin;
257   if (!baij->donotstash) {
258     if (n > baij->n_loc) {
259       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
260       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
261       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
262       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
263 
264       baij->n_loc = n;
265     }
266     in_loc = baij->in_loc;
267     v_loc  = baij->v_loc;
268   }
269 
270   for (i=0; i<m; i++) {
271     if (im[i] < 0) continue;
272     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
273     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
274       row = im[i] - rstart_orig;              /* local row index */
275       for (j=0; j<n; j++) {
276         if (im[i]/bs > in[j]/bs) {
277           if (a->ignore_ltriangular) {
278             continue;    /* ignore lower triangular blocks */
279           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
280         }
281         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
282           col  = in[j] - cstart_orig;         /* local col index */
283           brow = row/bs; bcol = col/bs;
284           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
285           if (roworiented) value = v[i*n+j];
286           else             value = v[i+j*m];
287           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
288           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
289         } else if (in[j] < 0) continue;
290         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
291         else {  /* off-diag entry (B) */
292           if (mat->was_assembled) {
293             if (!baij->colmap) {
294               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
295             }
296 #if defined(PETSC_USE_CTABLE)
297             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
298             col  = col - 1;
299 #else
300             col = baij->colmap[in[j]/bs] - 1;
301 #endif
302             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
303               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
304               col  =  in[j];
305               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
306               B    = baij->B;
307               b    = (Mat_SeqBAIJ*)(B)->data;
308               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
309               ba   = b->a;
310             } else col += in[j]%bs;
311           } else col = in[j];
312           if (roworiented) value = v[i*n+j];
313           else             value = v[i+j*m];
314           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
315           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
316         }
317       }
318     } else {  /* off processor entry */
319       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
320       if (!baij->donotstash) {
321         mat->assembled = PETSC_FALSE;
322         n_loc          = 0;
323         for (j=0; j<n; j++) {
324           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
325           in_loc[n_loc] = in[j];
326           if (roworiented) {
327             v_loc[n_loc] = v[i*n+j];
328           } else {
329             v_loc[n_loc] = v[j*m+i];
330           }
331           n_loc++;
332         }
333         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
334       }
335     }
336   }
337   PetscFunctionReturn(0);
338 }
339 
340 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
341 {
342   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
343   PetscErrorCode    ierr;
344   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
345   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
346   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
347   PetscBool         roworiented=a->roworiented;
348   const PetscScalar *value     = v;
349   MatScalar         *ap,*aa = a->a,*bap;
350 
351   PetscFunctionBegin;
352   if (col < row) {
353     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
354     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
355   }
356   rp   = aj + ai[row];
357   ap   = aa + bs2*ai[row];
358   rmax = imax[row];
359   nrow = ailen[row];
360   value = v;
361   low   = 0;
362   high  = nrow;
363 
364   while (high-low > 7) {
365     t = (low+high)/2;
366     if (rp[t] > col) high = t;
367     else             low  = t;
368   }
369   for (i=low; i<high; i++) {
370     if (rp[i] > col) break;
371     if (rp[i] == col) {
372       bap = ap +  bs2*i;
373       if (roworiented) {
374         if (is == ADD_VALUES) {
375           for (ii=0; ii<bs; ii++) {
376             for (jj=ii; jj<bs2; jj+=bs) {
377               bap[jj] += *value++;
378             }
379           }
380         } else {
381           for (ii=0; ii<bs; ii++) {
382             for (jj=ii; jj<bs2; jj+=bs) {
383               bap[jj] = *value++;
384             }
385           }
386         }
387       } else {
388         if (is == ADD_VALUES) {
389           for (ii=0; ii<bs; ii++) {
390             for (jj=0; jj<bs; jj++) {
391               *bap++ += *value++;
392             }
393           }
394         } else {
395           for (ii=0; ii<bs; ii++) {
396             for (jj=0; jj<bs; jj++) {
397               *bap++  = *value++;
398             }
399           }
400         }
401       }
402       goto noinsert2;
403     }
404   }
405   if (nonew == 1) goto noinsert2;
406   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
407   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
408   N = nrow++ - 1; high++;
409   /* shift up all the later entries in this row */
410   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
411   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
412   rp[i] = col;
413   bap   = ap +  bs2*i;
414   if (roworiented) {
415     for (ii=0; ii<bs; ii++) {
416       for (jj=ii; jj<bs2; jj+=bs) {
417         bap[jj] = *value++;
418       }
419     }
420   } else {
421     for (ii=0; ii<bs; ii++) {
422       for (jj=0; jj<bs; jj++) {
423         *bap++ = *value++;
424       }
425     }
426   }
427   noinsert2:;
428   ailen[row] = nrow;
429   PetscFunctionReturn(0);
430 }
431 
432 /*
433    This routine is exactly duplicated in mpibaij.c
434 */
435 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
436 {
437   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
438   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
439   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
440   PetscErrorCode    ierr;
441   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
442   PetscBool         roworiented=a->roworiented;
443   const PetscScalar *value     = v;
444   MatScalar         *ap,*aa = a->a,*bap;
445 
446   PetscFunctionBegin;
447   rp   = aj + ai[row];
448   ap   = aa + bs2*ai[row];
449   rmax = imax[row];
450   nrow = ailen[row];
451   low  = 0;
452   high = nrow;
453   value = v;
454   while (high-low > 7) {
455     t = (low+high)/2;
456     if (rp[t] > col) high = t;
457     else             low  = t;
458   }
459   for (i=low; i<high; i++) {
460     if (rp[i] > col) break;
461     if (rp[i] == col) {
462       bap = ap +  bs2*i;
463       if (roworiented) {
464         if (is == ADD_VALUES) {
465           for (ii=0; ii<bs; ii++) {
466             for (jj=ii; jj<bs2; jj+=bs) {
467               bap[jj] += *value++;
468             }
469           }
470         } else {
471           for (ii=0; ii<bs; ii++) {
472             for (jj=ii; jj<bs2; jj+=bs) {
473               bap[jj] = *value++;
474             }
475           }
476         }
477       } else {
478         if (is == ADD_VALUES) {
479           for (ii=0; ii<bs; ii++,value+=bs) {
480             for (jj=0; jj<bs; jj++) {
481               bap[jj] += value[jj];
482             }
483             bap += bs;
484           }
485         } else {
486           for (ii=0; ii<bs; ii++,value+=bs) {
487             for (jj=0; jj<bs; jj++) {
488               bap[jj]  = value[jj];
489             }
490             bap += bs;
491           }
492         }
493       }
494       goto noinsert2;
495     }
496   }
497   if (nonew == 1) goto noinsert2;
498   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
499   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
500   N = nrow++ - 1; high++;
501   /* shift up all the later entries in this row */
502   ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
503   ierr  = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
504   rp[i] = col;
505   bap   = ap +  bs2*i;
506   if (roworiented) {
507     for (ii=0; ii<bs; ii++) {
508       for (jj=ii; jj<bs2; jj+=bs) {
509         bap[jj] = *value++;
510       }
511     }
512   } else {
513     for (ii=0; ii<bs; ii++) {
514       for (jj=0; jj<bs; jj++) {
515         *bap++ = *value++;
516       }
517     }
518   }
519   noinsert2:;
520   ailen[row] = nrow;
521   PetscFunctionReturn(0);
522 }
523 
524 /*
525     This routine could be optimized by removing the need for the block copy below and passing stride information
526   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
527 */
528 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
529 {
530   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
531   const MatScalar *value;
532   MatScalar       *barray     =baij->barray;
533   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
534   PetscErrorCode  ierr;
535   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
536   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
537   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
538 
539   PetscFunctionBegin;
540   if (!barray) {
541     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
542     baij->barray = barray;
543   }
544 
545   if (roworiented) {
546     stepval = (n-1)*bs;
547   } else {
548     stepval = (m-1)*bs;
549   }
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
553     if (im[i] >= rstart && im[i] < rend) {
554       row = im[i] - rstart;
555       for (j=0; j<n; j++) {
556         if (im[i] > in[j]) {
557           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
558           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
559         }
560         /* If NumCol = 1 then a copy is not required */
561         if ((roworiented) && (n == 1)) {
562           barray = (MatScalar*) v + i*bs2;
563         } else if ((!roworiented) && (m == 1)) {
564           barray = (MatScalar*) v + j*bs2;
565         } else { /* Here a copy is required */
566           if (roworiented) {
567             value = v + i*(stepval+bs)*bs + j*bs;
568           } else {
569             value = v + j*(stepval+bs)*bs + i*bs;
570           }
571           for (ii=0; ii<bs; ii++,value+=stepval) {
572             for (jj=0; jj<bs; jj++) {
573               *barray++ = *value++;
574             }
575           }
576           barray -=bs2;
577         }
578 
579         if (in[j] >= cstart && in[j] < cend) {
580           col  = in[j] - cstart;
581           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
582         } else if (in[j] < 0) continue;
583         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
584         else {
585           if (mat->was_assembled) {
586             if (!baij->colmap) {
587               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 
590 #if defined(PETSC_USE_DEBUG)
591 #if defined(PETSC_USE_CTABLE)
592             { PetscInt data;
593               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
594               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
595             }
596 #else
597             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
598 #endif
599 #endif
600 #if defined(PETSC_USE_CTABLE)
601             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
602             col  = (col - 1)/bs;
603 #else
604             col = (baij->colmap[in[j]] - 1)/bs;
605 #endif
606             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
607               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
608               col  = in[j];
609             }
610           } else col = in[j];
611           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
612         }
613       }
614     } else {
615       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
616       if (!baij->donotstash) {
617         if (roworiented) {
618           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
633   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
640       row = idxm[i] - bsrstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= bscstart && idxn[j] < bscend) {
645           col  = idxn[j] - bscstart;
646           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!baij->colmap) {
649             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
653           data--;
654 #else
655           data = baij->colmap[idxn[j]/bs]-1;
656 #endif
657           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
658           else {
659             col  = data + idxn[j]%bs;
660             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
670 {
671   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
672   PetscErrorCode ierr;
673   PetscReal      sum[2],*lnorm2;
674 
675   PetscFunctionBegin;
676   if (baij->size == 1) {
677     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
678   } else {
679     if (type == NORM_FROBENIUS) {
680       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
681       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
682       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
683       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
684       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
685       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
686       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
687       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
688     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
689       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
690       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
691       PetscReal    *rsum,*rsum2,vabs;
692       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
693       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
694       MatScalar    *v;
695 
696       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
697       ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr);
698       /* Amat */
699       v = amat->a; jj = amat->j;
700       for (brow=0; brow<mbs; brow++) {
701         grow = bs*(rstart + brow);
702         nz   = amat->i[brow+1] - amat->i[brow];
703         for (bcol=0; bcol<nz; bcol++) {
704           gcol = bs*(rstart + *jj); jj++;
705           for (col=0; col<bs; col++) {
706             for (row=0; row<bs; row++) {
707               vabs            = PetscAbsScalar(*v); v++;
708               rsum[gcol+col] += vabs;
709               /* non-diagonal block */
710               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
711             }
712           }
713         }
714         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
715       }
716       /* Bmat */
717       v = bmat->a; jj = bmat->j;
718       for (brow=0; brow<mbs; brow++) {
719         grow = bs*(rstart + brow);
720         nz = bmat->i[brow+1] - bmat->i[brow];
721         for (bcol=0; bcol<nz; bcol++) {
722           gcol = bs*garray[*jj]; jj++;
723           for (col=0; col<bs; col++) {
724             for (row=0; row<bs; row++) {
725               vabs            = PetscAbsScalar(*v); v++;
726               rsum[gcol+col] += vabs;
727               rsum[grow+row] += vabs;
728             }
729           }
730         }
731         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
732       }
733       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
734       *norm = 0.0;
735       for (col=0; col<mat->cmap->N; col++) {
736         if (rsum2[col] > *norm) *norm = rsum2[col];
737       }
738       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
739     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
745 {
746   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
747   PetscErrorCode ierr;
748   PetscInt       nstash,reallocs;
749 
750   PetscFunctionBegin;
751   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
752 
753   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
754   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
755   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
756   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
757   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
758   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
759   PetscFunctionReturn(0);
760 }
761 
762 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
763 {
764   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
765   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
766   PetscErrorCode ierr;
767   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
768   PetscInt       *row,*col;
769   PetscBool      other_disassembled;
770   PetscMPIInt    n;
771   PetscBool      r1,r2,r3;
772   MatScalar      *val;
773 
774   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
775   PetscFunctionBegin;
776   if (!baij->donotstash &&  !mat->nooffprocentries) {
777     while (1) {
778       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
779       if (!flg) break;
780 
781       for (i=0; i<n;) {
782         /* Now identify the consecutive vals belonging to the same row */
783         for (j=i,rstart=row[j]; j<n; j++) {
784           if (row[j] != rstart) break;
785         }
786         if (j < n) ncols = j-i;
787         else       ncols = n-i;
788         /* Now assemble all these values with a single function call */
789         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
790         i    = j;
791       }
792     }
793     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
794     /* Now process the block-stash. Since the values are stashed column-oriented,
795        set the roworiented flag to column oriented, and after MatSetValues()
796        restore the original flags */
797     r1 = baij->roworiented;
798     r2 = a->roworiented;
799     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
800 
801     baij->roworiented = PETSC_FALSE;
802     a->roworiented    = PETSC_FALSE;
803 
804     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
805     while (1) {
806       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
807       if (!flg) break;
808 
809       for (i=0; i<n;) {
810         /* Now identify the consecutive vals belonging to the same row */
811         for (j=i,rstart=row[j]; j<n; j++) {
812           if (row[j] != rstart) break;
813         }
814         if (j < n) ncols = j-i;
815         else       ncols = n-i;
816         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
817         i    = j;
818       }
819     }
820     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
821 
822     baij->roworiented = r1;
823     a->roworiented    = r2;
824 
825     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
826   }
827 
828   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
829   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
830 
831   /* determine if any processor has disassembled, if so we must
832      also disassemble ourselfs, in order that we may reassemble. */
833   /*
834      if nonzero structure of submatrix B cannot change then we know that
835      no processor disassembled thus we can skip this stuff
836   */
837   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
838     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
839     if (mat->was_assembled && !other_disassembled) {
840       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
841     }
842   }
843 
844   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
845     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
846   }
847   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
848   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
849 
850   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
851 
852   baij->rowvalues = 0;
853 
854   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
855   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
856     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
857     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
858   }
859   PetscFunctionReturn(0);
860 }
861 
862 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
863 #include <petscdraw.h>
864 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
865 {
866   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
867   PetscErrorCode    ierr;
868   PetscInt          bs   = mat->rmap->bs;
869   PetscMPIInt       rank = baij->rank;
870   PetscBool         iascii,isdraw;
871   PetscViewer       sviewer;
872   PetscViewerFormat format;
873 
874   PetscFunctionBegin;
875   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
876   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
877   if (iascii) {
878     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
879     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
880       MatInfo info;
881       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
882       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
883       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
884       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
885       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
886       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
887       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
889       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
890       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
891       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
892       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
893       PetscFunctionReturn(0);
894     } else if (format == PETSC_VIEWER_ASCII_INFO) {
895       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
896       PetscFunctionReturn(0);
897     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
898       PetscFunctionReturn(0);
899     }
900   }
901 
902   if (isdraw) {
903     PetscDraw draw;
904     PetscBool isnull;
905     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
906     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
907     if (isnull) PetscFunctionReturn(0);
908   }
909 
910   {
911     /* assemble the entire matrix onto first processor. */
912     Mat          A;
913     Mat_SeqSBAIJ *Aloc;
914     Mat_SeqBAIJ  *Bloc;
915     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
916     MatScalar    *a;
917     const char   *matname;
918 
919     /* Should this be the same type as mat? */
920     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
921     if (!rank) {
922       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
923     } else {
924       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
925     }
926     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
927     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
928     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
929     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
930 
931     /* copy over the A part */
932     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
933     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
934     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
935 
936     for (i=0; i<mbs; i++) {
937       rvals[0] = bs*(baij->rstartbs + i);
938       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
939       for (j=ai[i]; j<ai[i+1]; j++) {
940         col = (baij->cstartbs+aj[j])*bs;
941         for (k=0; k<bs; k++) {
942           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
943           col++;
944           a += bs;
945         }
946       }
947     }
948     /* copy over the B part */
949     Bloc = (Mat_SeqBAIJ*)baij->B->data;
950     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
951     for (i=0; i<mbs; i++) {
952 
953       rvals[0] = bs*(baij->rstartbs + i);
954       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
955       for (j=ai[i]; j<ai[i+1]; j++) {
956         col = baij->garray[aj[j]]*bs;
957         for (k=0; k<bs; k++) {
958           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
959           col++;
960           a += bs;
961         }
962       }
963     }
964     ierr = PetscFree(rvals);CHKERRQ(ierr);
965     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
966     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
967     /*
968        Everyone has to call to draw the matrix since the graphics waits are
969        synchronized across all processors that share the PetscDraw object
970     */
971     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
972     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
973     if (!rank) {
974       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
975       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
976     }
977     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
978     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
979     ierr = MatDestroy(&A);CHKERRQ(ierr);
980   }
981   PetscFunctionReturn(0);
982 }
983 
984 /* Used for both MPIBAIJ and MPISBAIJ matrices */
985 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
986 
987 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
988 {
989   PetscErrorCode ierr;
990   PetscBool      iascii,isdraw,issocket,isbinary;
991 
992   PetscFunctionBegin;
993   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
994   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
995   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
996   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
997   if (iascii || isdraw || issocket) {
998     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
999   } else if (isbinary) {
1000     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1001   }
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1006 {
1007   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1008   PetscErrorCode ierr;
1009 
1010   PetscFunctionBegin;
1011 #if defined(PETSC_USE_LOG)
1012   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1013 #endif
1014   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1015   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1016   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1017   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1018 #if defined(PETSC_USE_CTABLE)
1019   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1020 #else
1021   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1022 #endif
1023   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1024   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1025   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1026   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1027   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1028   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1029   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1030   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1031   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1032   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1033   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1034   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1035   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1036   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1037   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1038 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1039   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1040 #endif
1041   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1042   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1043   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1044   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1045 
1046   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1047   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1048   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1049   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1050   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1051 #if defined(PETSC_HAVE_ELEMENTAL)
1052   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1053 #endif
1054 #if defined(PETSC_HAVE_SCALAPACK)
1055   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL);CHKERRQ(ierr);
1056 #endif
1057   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr);
1058   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr);
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1063 {
1064   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1065   PetscErrorCode    ierr;
1066   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1067   PetscScalar       *from;
1068   const PetscScalar *x;
1069 
1070   PetscFunctionBegin;
1071   /* diagonal part */
1072   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1073   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1074 
1075   /* subdiagonal part */
1076   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1077 
1078   /* copy x into the vec slvec0 */
1079   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1080   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1081 
1082   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1083   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1084   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1085 
1086   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1087   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1088   /* supperdiagonal part */
1089   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1094 {
1095   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1096   PetscErrorCode    ierr;
1097   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1098   PetscScalar       *from;
1099   const PetscScalar *x;
1100 
1101   PetscFunctionBegin;
1102   /* diagonal part */
1103   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1104   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1105 
1106   /* subdiagonal part */
1107   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1108 
1109   /* copy x into the vec slvec0 */
1110   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1111   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1112 
1113   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1114   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1115   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1116 
1117   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1118   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   /* supperdiagonal part */
1120   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1125 {
1126   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1127   PetscErrorCode    ierr;
1128   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1129   PetscScalar       *from,zero=0.0;
1130   const PetscScalar *x;
1131 
1132   PetscFunctionBegin;
1133   /* diagonal part */
1134   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1135   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1136 
1137   /* subdiagonal part */
1138   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1139 
1140   /* copy x into the vec slvec0 */
1141   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1142   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1143   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1144   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1145 
1146   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1147   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1148   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1149 
1150   /* supperdiagonal part */
1151   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1156 {
1157   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1158   PetscErrorCode    ierr;
1159   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1160   PetscScalar       *from,zero=0.0;
1161   const PetscScalar *x;
1162 
1163   PetscFunctionBegin;
1164   /* diagonal part */
1165   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1166   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1167 
1168   /* subdiagonal part */
1169   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1170 
1171   /* copy x into the vec slvec0 */
1172   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1173   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1174   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1175   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1176 
1177   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1178   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1179   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1180 
1181   /* supperdiagonal part */
1182   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 /*
1187   This only works correctly for square matrices where the subblock A->A is the
1188    diagonal block
1189 */
1190 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1191 {
1192   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1197   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1202 {
1203   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1208   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1213 {
1214   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1215   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1216   PetscErrorCode ierr;
1217   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1218   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1219   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1220 
1221   PetscFunctionBegin;
1222   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1223   mat->getrowactive = PETSC_TRUE;
1224 
1225   if (!mat->rowvalues && (idx || v)) {
1226     /*
1227         allocate enough space to hold information from the longest row.
1228     */
1229     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1230     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1231     PetscInt     max = 1,mbs = mat->mbs,tmp;
1232     for (i=0; i<mbs; i++) {
1233       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1234       if (max < tmp) max = tmp;
1235     }
1236     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1237   }
1238 
1239   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1240   lrow = row - brstart;  /* local row index */
1241 
1242   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1243   if (!v)   {pvA = 0; pvB = 0;}
1244   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1245   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1246   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1247   nztot = nzA + nzB;
1248 
1249   cmap = mat->garray;
1250   if (v  || idx) {
1251     if (nztot) {
1252       /* Sort by increasing column numbers, assuming A and B already sorted */
1253       PetscInt imark = -1;
1254       if (v) {
1255         *v = v_p = mat->rowvalues;
1256         for (i=0; i<nzB; i++) {
1257           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1258           else break;
1259         }
1260         imark = i;
1261         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1262         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1263       }
1264       if (idx) {
1265         *idx = idx_p = mat->rowindices;
1266         if (imark > -1) {
1267           for (i=0; i<imark; i++) {
1268             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1269           }
1270         } else {
1271           for (i=0; i<nzB; i++) {
1272             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1273             else break;
1274           }
1275           imark = i;
1276         }
1277         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1278         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1279       }
1280     } else {
1281       if (idx) *idx = 0;
1282       if (v)   *v   = 0;
1283     }
1284   }
1285   *nz  = nztot;
1286   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1287   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1292 {
1293   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1294 
1295   PetscFunctionBegin;
1296   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1297   baij->getrowactive = PETSC_FALSE;
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1302 {
1303   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1304   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1305 
1306   PetscFunctionBegin;
1307   aA->getrow_utriangular = PETSC_TRUE;
1308   PetscFunctionReturn(0);
1309 }
1310 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1311 {
1312   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1313   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1314 
1315   PetscFunctionBegin;
1316   aA->getrow_utriangular = PETSC_FALSE;
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1321 {
1322   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1323   PetscErrorCode ierr;
1324 
1325   PetscFunctionBegin;
1326   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1327   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1332 {
1333   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1334   PetscErrorCode ierr;
1335 
1336   PetscFunctionBegin;
1337   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1338   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1343    Input: isrow       - distributed(parallel),
1344           iscol_local - locally owned (seq)
1345 */
1346 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1347 {
1348   PetscErrorCode ierr;
1349   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1350   const PetscInt *ptr1,*ptr2;
1351 
1352   PetscFunctionBegin;
1353   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1354   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1355   if (sz1 > sz2) {
1356     *flg = PETSC_FALSE;
1357     PetscFunctionReturn(0);
1358   }
1359 
1360   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1361   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1362 
1363   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1364   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1365   ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr);
1366   ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr);
1367   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1368   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1369 
1370   nmatch=0;
1371   k     = 0;
1372   for (i=0; i<sz1; i++){
1373     for (j=k; j<sz2; j++){
1374       if (a1[i] == a2[j]) {
1375         k = j; nmatch++;
1376         break;
1377       }
1378     }
1379   }
1380   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1381   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1382   ierr = PetscFree(a1);CHKERRQ(ierr);
1383   ierr = PetscFree(a2);CHKERRQ(ierr);
1384   if (nmatch < sz1) {
1385     *flg = PETSC_FALSE;
1386   } else {
1387     *flg = PETSC_TRUE;
1388   }
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1393 {
1394   PetscErrorCode ierr;
1395   IS             iscol_local;
1396   PetscInt       csize;
1397   PetscBool      isequal;
1398 
1399   PetscFunctionBegin;
1400   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1401   if (call == MAT_REUSE_MATRIX) {
1402     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1403     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1404   } else {
1405     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1406     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1407     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1408   }
1409 
1410   /* now call MatCreateSubMatrix_MPIBAIJ() */
1411   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1412   if (call == MAT_INITIAL_MATRIX) {
1413     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1414     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1415   }
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1420 {
1421   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1422   PetscErrorCode ierr;
1423 
1424   PetscFunctionBegin;
1425   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1426   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1431 {
1432   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1433   Mat            A  = a->A,B = a->B;
1434   PetscErrorCode ierr;
1435   PetscLogDouble isend[5],irecv[5];
1436 
1437   PetscFunctionBegin;
1438   info->block_size = (PetscReal)matin->rmap->bs;
1439 
1440   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1441 
1442   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1443   isend[3] = info->memory;  isend[4] = info->mallocs;
1444 
1445   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1446 
1447   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1448   isend[3] += info->memory;  isend[4] += info->mallocs;
1449   if (flag == MAT_LOCAL) {
1450     info->nz_used      = isend[0];
1451     info->nz_allocated = isend[1];
1452     info->nz_unneeded  = isend[2];
1453     info->memory       = isend[3];
1454     info->mallocs      = isend[4];
1455   } else if (flag == MAT_GLOBAL_MAX) {
1456     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1457 
1458     info->nz_used      = irecv[0];
1459     info->nz_allocated = irecv[1];
1460     info->nz_unneeded  = irecv[2];
1461     info->memory       = irecv[3];
1462     info->mallocs      = irecv[4];
1463   } else if (flag == MAT_GLOBAL_SUM) {
1464     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1465 
1466     info->nz_used      = irecv[0];
1467     info->nz_allocated = irecv[1];
1468     info->nz_unneeded  = irecv[2];
1469     info->memory       = irecv[3];
1470     info->mallocs      = irecv[4];
1471   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1472   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1473   info->fill_ratio_needed = 0;
1474   info->factor_mallocs    = 0;
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1479 {
1480   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1481   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1482   PetscErrorCode ierr;
1483 
1484   PetscFunctionBegin;
1485   switch (op) {
1486   case MAT_NEW_NONZERO_LOCATIONS:
1487   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1488   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1489   case MAT_KEEP_NONZERO_PATTERN:
1490   case MAT_SUBMAT_SINGLEIS:
1491   case MAT_NEW_NONZERO_LOCATION_ERR:
1492     MatCheckPreallocated(A,1);
1493     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1494     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1495     break;
1496   case MAT_ROW_ORIENTED:
1497     MatCheckPreallocated(A,1);
1498     a->roworiented = flg;
1499 
1500     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1501     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1502     break;
1503   case MAT_NEW_DIAGONALS:
1504   case MAT_SORTED_FULL:
1505     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1506     break;
1507   case MAT_IGNORE_OFF_PROC_ENTRIES:
1508     a->donotstash = flg;
1509     break;
1510   case MAT_USE_HASH_TABLE:
1511     a->ht_flag = flg;
1512     break;
1513   case MAT_HERMITIAN:
1514     MatCheckPreallocated(A,1);
1515     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1516 #if defined(PETSC_USE_COMPLEX)
1517     if (flg) { /* need different mat-vec ops */
1518       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1519       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1520       A->ops->multtranspose    = NULL;
1521       A->ops->multtransposeadd = NULL;
1522       A->symmetric = PETSC_FALSE;
1523     }
1524 #endif
1525     break;
1526   case MAT_SPD:
1527   case MAT_SYMMETRIC:
1528     MatCheckPreallocated(A,1);
1529     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1530 #if defined(PETSC_USE_COMPLEX)
1531     if (flg) { /* restore to use default mat-vec ops */
1532       A->ops->mult             = MatMult_MPISBAIJ;
1533       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1534       A->ops->multtranspose    = MatMult_MPISBAIJ;
1535       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1536     }
1537 #endif
1538     break;
1539   case MAT_STRUCTURALLY_SYMMETRIC:
1540     MatCheckPreallocated(A,1);
1541     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1542     break;
1543   case MAT_SYMMETRY_ETERNAL:
1544     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1545     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1546     break;
1547   case MAT_IGNORE_LOWER_TRIANGULAR:
1548     aA->ignore_ltriangular = flg;
1549     break;
1550   case MAT_ERROR_LOWER_TRIANGULAR:
1551     aA->ignore_ltriangular = flg;
1552     break;
1553   case MAT_GETROW_UPPERTRIANGULAR:
1554     aA->getrow_utriangular = flg;
1555     break;
1556   default:
1557     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1558   }
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1563 {
1564   PetscErrorCode ierr;
1565 
1566   PetscFunctionBegin;
1567   if (reuse == MAT_INITIAL_MATRIX) {
1568     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1569   }  else if (reuse == MAT_REUSE_MATRIX) {
1570     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1571   }
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1576 {
1577   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1578   Mat            a     = baij->A, b=baij->B;
1579   PetscErrorCode ierr;
1580   PetscInt       nv,m,n;
1581   PetscBool      flg;
1582 
1583   PetscFunctionBegin;
1584   if (ll != rr) {
1585     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1586     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1587   }
1588   if (!ll) PetscFunctionReturn(0);
1589 
1590   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1591   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1592 
1593   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1594   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1595 
1596   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1597 
1598   /* left diagonalscale the off-diagonal part */
1599   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1600 
1601   /* scale the diagonal part */
1602   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1603 
1604   /* right diagonalscale the off-diagonal part */
1605   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1611 {
1612   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1613   PetscErrorCode ierr;
1614 
1615   PetscFunctionBegin;
1616   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1621 
1622 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1623 {
1624   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1625   Mat            a,b,c,d;
1626   PetscBool      flg;
1627   PetscErrorCode ierr;
1628 
1629   PetscFunctionBegin;
1630   a = matA->A; b = matA->B;
1631   c = matB->A; d = matB->B;
1632 
1633   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1634   if (flg) {
1635     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1636   }
1637   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1642 {
1643   PetscErrorCode ierr;
1644   PetscBool      isbaij;
1645 
1646   PetscFunctionBegin;
1647   ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
1648   if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1649   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1650   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1651     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1652     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1653     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1654   } else {
1655     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1656     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1657 
1658     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1659     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1660   }
1661   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1666 {
1667   PetscErrorCode ierr;
1668 
1669   PetscFunctionBegin;
1670   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1675 {
1676   PetscErrorCode ierr;
1677   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1678   PetscBLASInt   bnz,one=1;
1679   Mat_SeqSBAIJ   *xa,*ya;
1680   Mat_SeqBAIJ    *xb,*yb;
1681 
1682   PetscFunctionBegin;
1683   if (str == SAME_NONZERO_PATTERN) {
1684     PetscScalar alpha = a;
1685     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1686     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1687     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1688     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1689     xb   = (Mat_SeqBAIJ*)xx->B->data;
1690     yb   = (Mat_SeqBAIJ*)yy->B->data;
1691     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1692     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1693     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1694   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1695     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1696     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1697     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1698   } else {
1699     Mat      B;
1700     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1701     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1702     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1703     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1704     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1705     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1706     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1707     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1708     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1709     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1710     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1711     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1712     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1713     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1714     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1715     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1716     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1717     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1718     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1719     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1720   }
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1725 {
1726   PetscErrorCode ierr;
1727   PetscInt       i;
1728   PetscBool      flg;
1729 
1730   PetscFunctionBegin;
1731   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1732   for (i=0; i<n; i++) {
1733     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1734     if (!flg) {
1735       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1736     }
1737   }
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1742 {
1743   PetscErrorCode ierr;
1744   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1745   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1746 
1747   PetscFunctionBegin;
1748   if (!Y->preallocated) {
1749     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1750   } else if (!aij->nz) {
1751     PetscInt nonew = aij->nonew;
1752     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1753     aij->nonew = nonew;
1754   }
1755   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1760 {
1761   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1762   PetscErrorCode ierr;
1763 
1764   PetscFunctionBegin;
1765   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1766   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1767   if (d) {
1768     PetscInt rstart;
1769     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1770     *d += rstart/A->rmap->bs;
1771 
1772   }
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1777 {
1778   PetscFunctionBegin;
1779   *a = ((Mat_MPISBAIJ*)A->data)->A;
1780   PetscFunctionReturn(0);
1781 }
1782 
1783 /* -------------------------------------------------------------------*/
1784 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1785                                        MatGetRow_MPISBAIJ,
1786                                        MatRestoreRow_MPISBAIJ,
1787                                        MatMult_MPISBAIJ,
1788                                /*  4*/ MatMultAdd_MPISBAIJ,
1789                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1790                                        MatMultAdd_MPISBAIJ,
1791                                        0,
1792                                        0,
1793                                        0,
1794                                /* 10*/ 0,
1795                                        0,
1796                                        0,
1797                                        MatSOR_MPISBAIJ,
1798                                        MatTranspose_MPISBAIJ,
1799                                /* 15*/ MatGetInfo_MPISBAIJ,
1800                                        MatEqual_MPISBAIJ,
1801                                        MatGetDiagonal_MPISBAIJ,
1802                                        MatDiagonalScale_MPISBAIJ,
1803                                        MatNorm_MPISBAIJ,
1804                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1805                                        MatAssemblyEnd_MPISBAIJ,
1806                                        MatSetOption_MPISBAIJ,
1807                                        MatZeroEntries_MPISBAIJ,
1808                                /* 24*/ 0,
1809                                        0,
1810                                        0,
1811                                        0,
1812                                        0,
1813                                /* 29*/ MatSetUp_MPISBAIJ,
1814                                        0,
1815                                        0,
1816                                        MatGetDiagonalBlock_MPISBAIJ,
1817                                        0,
1818                                /* 34*/ MatDuplicate_MPISBAIJ,
1819                                        0,
1820                                        0,
1821                                        0,
1822                                        0,
1823                                /* 39*/ MatAXPY_MPISBAIJ,
1824                                        MatCreateSubMatrices_MPISBAIJ,
1825                                        MatIncreaseOverlap_MPISBAIJ,
1826                                        MatGetValues_MPISBAIJ,
1827                                        MatCopy_MPISBAIJ,
1828                                /* 44*/ 0,
1829                                        MatScale_MPISBAIJ,
1830                                        MatShift_MPISBAIJ,
1831                                        0,
1832                                        0,
1833                                /* 49*/ 0,
1834                                        0,
1835                                        0,
1836                                        0,
1837                                        0,
1838                                /* 54*/ 0,
1839                                        0,
1840                                        MatSetUnfactored_MPISBAIJ,
1841                                        0,
1842                                        MatSetValuesBlocked_MPISBAIJ,
1843                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1844                                        0,
1845                                        0,
1846                                        0,
1847                                        0,
1848                                /* 64*/ 0,
1849                                        0,
1850                                        0,
1851                                        0,
1852                                        0,
1853                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1854                                        0,
1855                                        MatConvert_MPISBAIJ_Basic,
1856                                        0,
1857                                        0,
1858                                /* 74*/ 0,
1859                                        0,
1860                                        0,
1861                                        0,
1862                                        0,
1863                                /* 79*/ 0,
1864                                        0,
1865                                        0,
1866                                        0,
1867                                        MatLoad_MPISBAIJ,
1868                                /* 84*/ 0,
1869                                        0,
1870                                        0,
1871                                        0,
1872                                        0,
1873                                /* 89*/ 0,
1874                                        0,
1875                                        0,
1876                                        0,
1877                                        0,
1878                                /* 94*/ 0,
1879                                        0,
1880                                        0,
1881                                        0,
1882                                        0,
1883                                /* 99*/ 0,
1884                                        0,
1885                                        0,
1886                                        0,
1887                                        0,
1888                                /*104*/ 0,
1889                                        MatRealPart_MPISBAIJ,
1890                                        MatImaginaryPart_MPISBAIJ,
1891                                        MatGetRowUpperTriangular_MPISBAIJ,
1892                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1893                                /*109*/ 0,
1894                                        0,
1895                                        0,
1896                                        0,
1897                                        MatMissingDiagonal_MPISBAIJ,
1898                                /*114*/ 0,
1899                                        0,
1900                                        0,
1901                                        0,
1902                                        0,
1903                                /*119*/ 0,
1904                                        0,
1905                                        0,
1906                                        0,
1907                                        0,
1908                                /*124*/ 0,
1909                                        0,
1910                                        0,
1911                                        0,
1912                                        0,
1913                                /*129*/ 0,
1914                                        0,
1915                                        0,
1916                                        0,
1917                                        0,
1918                                /*134*/ 0,
1919                                        0,
1920                                        0,
1921                                        0,
1922                                        0,
1923                                /*139*/ MatSetBlockSizes_Default,
1924                                        0,
1925                                        0,
1926                                        0,
1927                                        0,
1928                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1929 };
1930 
1931 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1932 {
1933   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1934   PetscErrorCode ierr;
1935   PetscInt       i,mbs,Mbs;
1936   PetscMPIInt    size;
1937 
1938   PetscFunctionBegin;
1939   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1940   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1941   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1942   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1943   if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N);
1944   if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n);
1945 
1946   mbs = B->rmap->n/bs;
1947   Mbs = B->rmap->N/bs;
1948   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1949 
1950   B->rmap->bs = bs;
1951   b->bs2      = bs*bs;
1952   b->mbs      = mbs;
1953   b->Mbs      = Mbs;
1954   b->nbs      = B->cmap->n/bs;
1955   b->Nbs      = B->cmap->N/bs;
1956 
1957   for (i=0; i<=b->size; i++) {
1958     b->rangebs[i] = B->rmap->range[i]/bs;
1959   }
1960   b->rstartbs = B->rmap->rstart/bs;
1961   b->rendbs   = B->rmap->rend/bs;
1962 
1963   b->cstartbs = B->cmap->rstart/bs;
1964   b->cendbs   = B->cmap->rend/bs;
1965 
1966 #if defined(PETSC_USE_CTABLE)
1967   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
1968 #else
1969   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
1970 #endif
1971   ierr = PetscFree(b->garray);CHKERRQ(ierr);
1972   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
1973   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
1974   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
1975   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
1976   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
1977   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
1978   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
1979   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
1980 
1981   /* Because the B will have been resized we simply destroy it and create a new one each time */
1982   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
1983   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
1984   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1985   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
1986   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1987   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1988 
1989   if (!B->preallocated) {
1990     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1991     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1992     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1993     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1994     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1995   }
1996 
1997   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1998   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1999 
2000   B->preallocated  = PETSC_TRUE;
2001   B->was_assembled = PETSC_FALSE;
2002   B->assembled     = PETSC_FALSE;
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2007 {
2008   PetscInt       m,rstart,cend;
2009   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0;
2010   const PetscInt *JJ    =0;
2011   PetscScalar    *values=0;
2012   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
2013   PetscErrorCode ierr;
2014   PetscBool      nooffprocentries;
2015 
2016   PetscFunctionBegin;
2017   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2018   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2019   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2020   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2021   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2022   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2023   m      = B->rmap->n/bs;
2024   rstart = B->rmap->rstart/bs;
2025   cend   = B->cmap->rend/bs;
2026 
2027   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2028   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2029   for (i=0; i<m; i++) {
2030     nz = ii[i+1] - ii[i];
2031     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2032     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2033     JJ     = jj + ii[i];
2034     bd     = 0;
2035     for (j=0; j<nz; j++) {
2036       if (*JJ >= i + rstart) break;
2037       JJ++;
2038       bd++;
2039     }
2040     d  = 0;
2041     for (; j<nz; j++) {
2042       if (*JJ++ >= cend) break;
2043       d++;
2044     }
2045     d_nnz[i] = d;
2046     o_nnz[i] = nz - d - bd;
2047     nz       = nz - bd;
2048     nz_max = PetscMax(nz_max,nz);
2049   }
2050   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2051   ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2052   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2053 
2054   values = (PetscScalar*)V;
2055   if (!values) {
2056     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2057   }
2058   for (i=0; i<m; i++) {
2059     PetscInt          row    = i + rstart;
2060     PetscInt          ncols  = ii[i+1] - ii[i];
2061     const PetscInt    *icols = jj + ii[i];
2062     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2063       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2064       ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2065     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2066       PetscInt j;
2067       for (j=0; j<ncols; j++) {
2068         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2069         ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2070       }
2071     }
2072   }
2073 
2074   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2075   nooffprocentries    = B->nooffprocentries;
2076   B->nooffprocentries = PETSC_TRUE;
2077   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   B->nooffprocentries = nooffprocentries;
2080 
2081   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 /*MC
2086    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2087    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2088    the matrix is stored.
2089 
2090    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2091    can call MatSetOption(Mat, MAT_HERMITIAN);
2092 
2093    Options Database Keys:
2094 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2095 
2096    Notes:
2097      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2098      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2099 
2100    Level: beginner
2101 
2102 .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType
2103 M*/
2104 
2105 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2106 {
2107   Mat_MPISBAIJ   *b;
2108   PetscErrorCode ierr;
2109   PetscBool      flg = PETSC_FALSE;
2110 
2111   PetscFunctionBegin;
2112   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2113   B->data = (void*)b;
2114   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2115 
2116   B->ops->destroy = MatDestroy_MPISBAIJ;
2117   B->ops->view    = MatView_MPISBAIJ;
2118   B->assembled    = PETSC_FALSE;
2119   B->insertmode   = NOT_SET_VALUES;
2120 
2121   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2122   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2123 
2124   /* build local table of row and column ownerships */
2125   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2126 
2127   /* build cache for off array entries formed */
2128   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2129 
2130   b->donotstash  = PETSC_FALSE;
2131   b->colmap      = NULL;
2132   b->garray      = NULL;
2133   b->roworiented = PETSC_TRUE;
2134 
2135   /* stuff used in block assembly */
2136   b->barray = 0;
2137 
2138   /* stuff used for matrix vector multiply */
2139   b->lvec    = 0;
2140   b->Mvctx   = 0;
2141   b->slvec0  = 0;
2142   b->slvec0b = 0;
2143   b->slvec1  = 0;
2144   b->slvec1a = 0;
2145   b->slvec1b = 0;
2146   b->sMvctx  = 0;
2147 
2148   /* stuff for MatGetRow() */
2149   b->rowindices   = 0;
2150   b->rowvalues    = 0;
2151   b->getrowactive = PETSC_FALSE;
2152 
2153   /* hash table stuff */
2154   b->ht           = 0;
2155   b->hd           = 0;
2156   b->ht_size      = 0;
2157   b->ht_flag      = PETSC_FALSE;
2158   b->ht_fact      = 0;
2159   b->ht_total_ct  = 0;
2160   b->ht_insert_ct = 0;
2161 
2162   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2163   b->ijonly = PETSC_FALSE;
2164 
2165   b->in_loc = 0;
2166   b->v_loc  = 0;
2167   b->n_loc  = 0;
2168 
2169   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2170   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2171   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2172   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2173 #if defined(PETSC_HAVE_ELEMENTAL)
2174   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2175 #endif
2176 #if defined(PETSC_HAVE_SCALAPACK)
2177   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK);CHKERRQ(ierr);
2178 #endif
2179   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2180   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2181 
2182   B->symmetric                  = PETSC_TRUE;
2183   B->structurally_symmetric     = PETSC_TRUE;
2184   B->symmetric_set              = PETSC_TRUE;
2185   B->structurally_symmetric_set = PETSC_TRUE;
2186   B->symmetric_eternal          = PETSC_TRUE;
2187 #if defined(PETSC_USE_COMPLEX)
2188   B->hermitian                  = PETSC_FALSE;
2189   B->hermitian_set              = PETSC_FALSE;
2190 #else
2191   B->hermitian                  = PETSC_TRUE;
2192   B->hermitian_set              = PETSC_TRUE;
2193 #endif
2194 
2195   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2196   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2197   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2198   if (flg) {
2199     PetscReal fact = 1.39;
2200     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2201     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2202     if (fact <= 1.0) fact = 1.39;
2203     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2204     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2205   }
2206   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 /*MC
2211    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2212 
2213    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2214    and MATMPISBAIJ otherwise.
2215 
2216    Options Database Keys:
2217 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2218 
2219   Level: beginner
2220 
2221 .seealso: MatCreateMPISBAIJ, MATSEQSBAIJ, MATMPISBAIJ
2222 M*/
2223 
2224 /*@C
2225    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2226    the user should preallocate the matrix storage by setting the parameters
2227    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2228    performance can be increased by more than a factor of 50.
2229 
2230    Collective on Mat
2231 
2232    Input Parameters:
2233 +  B - the matrix
2234 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2235           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2236 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2237            submatrix  (same for all local rows)
2238 .  d_nnz - array containing the number of block nonzeros in the various block rows
2239            in the upper triangular and diagonal part of the in diagonal portion of the local
2240            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2241            for the diagonal entry and set a value even if it is zero.
2242 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2243            submatrix (same for all local rows).
2244 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2245            off-diagonal portion of the local submatrix that is right of the diagonal
2246            (possibly different for each block row) or NULL.
2247 
2248 
2249    Options Database Keys:
2250 +   -mat_no_unroll - uses code that does not unroll the loops in the
2251                      block calculations (much slower)
2252 -   -mat_block_size - size of the blocks to use
2253 
2254    Notes:
2255 
2256    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2257    than it must be used on all processors that share the object for that argument.
2258 
2259    If the *_nnz parameter is given then the *_nz parameter is ignored
2260 
2261    Storage Information:
2262    For a square global matrix we define each processor's diagonal portion
2263    to be its local rows and the corresponding columns (a square submatrix);
2264    each processor's off-diagonal portion encompasses the remainder of the
2265    local matrix (a rectangular submatrix).
2266 
2267    The user can specify preallocated storage for the diagonal part of
2268    the local submatrix with either d_nz or d_nnz (not both).  Set
2269    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2270    memory allocation.  Likewise, specify preallocated storage for the
2271    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2272 
2273    You can call MatGetInfo() to get information on how effective the preallocation was;
2274    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2275    You can also run with the option -info and look for messages with the string
2276    malloc in them to see if additional memory allocation was needed.
2277 
2278    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2279    the figure below we depict these three local rows and all columns (0-11).
2280 
2281 .vb
2282            0 1 2 3 4 5 6 7 8 9 10 11
2283           --------------------------
2284    row 3  |. . . d d d o o o o  o  o
2285    row 4  |. . . d d d o o o o  o  o
2286    row 5  |. . . d d d o o o o  o  o
2287           --------------------------
2288 .ve
2289 
2290    Thus, any entries in the d locations are stored in the d (diagonal)
2291    submatrix, and any entries in the o locations are stored in the
2292    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2293    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2294 
2295    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2296    plus the diagonal part of the d matrix,
2297    and o_nz should indicate the number of block nonzeros per row in the o matrix
2298 
2299    In general, for PDE problems in which most nonzeros are near the diagonal,
2300    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2301    or you will get TERRIBLE performance; see the users' manual chapter on
2302    matrices.
2303 
2304    Level: intermediate
2305 
2306 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2307 @*/
2308 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2309 {
2310   PetscErrorCode ierr;
2311 
2312   PetscFunctionBegin;
2313   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2314   PetscValidType(B,1);
2315   PetscValidLogicalCollectiveInt(B,bs,2);
2316   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 /*@C
2321    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2322    (block compressed row).  For good matrix assembly performance
2323    the user should preallocate the matrix storage by setting the parameters
2324    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2325    performance can be increased by more than a factor of 50.
2326 
2327    Collective
2328 
2329    Input Parameters:
2330 +  comm - MPI communicator
2331 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2332           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2333 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2334            This value should be the same as the local size used in creating the
2335            y vector for the matrix-vector product y = Ax.
2336 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2337            This value should be the same as the local size used in creating the
2338            x vector for the matrix-vector product y = Ax.
2339 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2340 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2341 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2342            submatrix  (same for all local rows)
2343 .  d_nnz - array containing the number of block nonzeros in the various block rows
2344            in the upper triangular portion of the in diagonal portion of the local
2345            (possibly different for each block block row) or NULL.
2346            If you plan to factor the matrix you must leave room for the diagonal entry and
2347            set its value even if it is zero.
2348 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2349            submatrix (same for all local rows).
2350 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2351            off-diagonal portion of the local submatrix (possibly different for
2352            each block row) or NULL.
2353 
2354    Output Parameter:
2355 .  A - the matrix
2356 
2357    Options Database Keys:
2358 +   -mat_no_unroll - uses code that does not unroll the loops in the
2359                      block calculations (much slower)
2360 .   -mat_block_size - size of the blocks to use
2361 -   -mat_mpi - use the parallel matrix data structures even on one processor
2362                (defaults to using SeqBAIJ format on one processor)
2363 
2364    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2365    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2366    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2367 
2368    Notes:
2369    The number of rows and columns must be divisible by blocksize.
2370    This matrix type does not support complex Hermitian operation.
2371 
2372    The user MUST specify either the local or global matrix dimensions
2373    (possibly both).
2374 
2375    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2376    than it must be used on all processors that share the object for that argument.
2377 
2378    If the *_nnz parameter is given then the *_nz parameter is ignored
2379 
2380    Storage Information:
2381    For a square global matrix we define each processor's diagonal portion
2382    to be its local rows and the corresponding columns (a square submatrix);
2383    each processor's off-diagonal portion encompasses the remainder of the
2384    local matrix (a rectangular submatrix).
2385 
2386    The user can specify preallocated storage for the diagonal part of
2387    the local submatrix with either d_nz or d_nnz (not both).  Set
2388    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2389    memory allocation.  Likewise, specify preallocated storage for the
2390    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2391 
2392    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2393    the figure below we depict these three local rows and all columns (0-11).
2394 
2395 .vb
2396            0 1 2 3 4 5 6 7 8 9 10 11
2397           --------------------------
2398    row 3  |. . . d d d o o o o  o  o
2399    row 4  |. . . d d d o o o o  o  o
2400    row 5  |. . . d d d o o o o  o  o
2401           --------------------------
2402 .ve
2403 
2404    Thus, any entries in the d locations are stored in the d (diagonal)
2405    submatrix, and any entries in the o locations are stored in the
2406    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2407    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2408 
2409    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2410    plus the diagonal part of the d matrix,
2411    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2412    In general, for PDE problems in which most nonzeros are near the diagonal,
2413    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2414    or you will get TERRIBLE performance; see the users' manual chapter on
2415    matrices.
2416 
2417    Level: intermediate
2418 
2419 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2420 @*/
2421 
2422 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2423 {
2424   PetscErrorCode ierr;
2425   PetscMPIInt    size;
2426 
2427   PetscFunctionBegin;
2428   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2429   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2430   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2431   if (size > 1) {
2432     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2433     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2434   } else {
2435     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2436     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2437   }
2438   PetscFunctionReturn(0);
2439 }
2440 
2441 
2442 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2443 {
2444   Mat            mat;
2445   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2446   PetscErrorCode ierr;
2447   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2448   PetscScalar    *array;
2449 
2450   PetscFunctionBegin;
2451   *newmat = 0;
2452 
2453   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2454   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2455   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2456   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2457   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2458 
2459   mat->factortype   = matin->factortype;
2460   mat->preallocated = PETSC_TRUE;
2461   mat->assembled    = PETSC_TRUE;
2462   mat->insertmode   = NOT_SET_VALUES;
2463 
2464   a      = (Mat_MPISBAIJ*)mat->data;
2465   a->bs2 = oldmat->bs2;
2466   a->mbs = oldmat->mbs;
2467   a->nbs = oldmat->nbs;
2468   a->Mbs = oldmat->Mbs;
2469   a->Nbs = oldmat->Nbs;
2470 
2471   a->size         = oldmat->size;
2472   a->rank         = oldmat->rank;
2473   a->donotstash   = oldmat->donotstash;
2474   a->roworiented  = oldmat->roworiented;
2475   a->rowindices   = 0;
2476   a->rowvalues    = 0;
2477   a->getrowactive = PETSC_FALSE;
2478   a->barray       = 0;
2479   a->rstartbs     = oldmat->rstartbs;
2480   a->rendbs       = oldmat->rendbs;
2481   a->cstartbs     = oldmat->cstartbs;
2482   a->cendbs       = oldmat->cendbs;
2483 
2484   /* hash table stuff */
2485   a->ht           = 0;
2486   a->hd           = 0;
2487   a->ht_size      = 0;
2488   a->ht_flag      = oldmat->ht_flag;
2489   a->ht_fact      = oldmat->ht_fact;
2490   a->ht_total_ct  = 0;
2491   a->ht_insert_ct = 0;
2492 
2493   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr);
2494   if (oldmat->colmap) {
2495 #if defined(PETSC_USE_CTABLE)
2496     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2497 #else
2498     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2499     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2500     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
2501 #endif
2502   } else a->colmap = 0;
2503 
2504   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2505     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2506     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2507     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
2508   } else a->garray = 0;
2509 
2510   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2511   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2512   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2513   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2514   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2515 
2516   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2517   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2518   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2519   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2520 
2521   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2522   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2523   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2524   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2525   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2526   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2527   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2528   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2529   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2530   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2531   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2532   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2533   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2534 
2535   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2536   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2537   a->sMvctx = oldmat->sMvctx;
2538   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2539 
2540   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2541   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2542   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2543   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2544   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2545   *newmat = mat;
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2550 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2551 
2552 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
2553 {
2554   PetscErrorCode ierr;
2555   PetscBool      isbinary;
2556 
2557   PetscFunctionBegin;
2558   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2559   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
2560   ierr = MatLoad_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 /*XXXXX@
2565    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2566 
2567    Input Parameters:
2568 .  mat  - the matrix
2569 .  fact - factor
2570 
2571    Not Collective on Mat, each process can have a different hash factor
2572 
2573    Level: advanced
2574 
2575   Notes:
2576    This can also be set by the command line option: -mat_use_hash_table fact
2577 
2578 .seealso: MatSetOption()
2579 @XXXXX*/
2580 
2581 
2582 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2583 {
2584   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2585   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2586   PetscReal      atmp;
2587   PetscReal      *work,*svalues,*rvalues;
2588   PetscErrorCode ierr;
2589   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2590   PetscMPIInt    rank,size;
2591   PetscInt       *rowners_bs,dest,count,source;
2592   PetscScalar    *va;
2593   MatScalar      *ba;
2594   MPI_Status     stat;
2595 
2596   PetscFunctionBegin;
2597   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2598   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2599   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2600 
2601   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2602   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2603 
2604   bs  = A->rmap->bs;
2605   mbs = a->mbs;
2606   Mbs = a->Mbs;
2607   ba  = b->a;
2608   bi  = b->i;
2609   bj  = b->j;
2610 
2611   /* find ownerships */
2612   rowners_bs = A->rmap->range;
2613 
2614   /* each proc creates an array to be distributed */
2615   ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2616 
2617   /* row_max for B */
2618   if (rank != size-1) {
2619     for (i=0; i<mbs; i++) {
2620       ncols = bi[1] - bi[0]; bi++;
2621       brow  = bs*i;
2622       for (j=0; j<ncols; j++) {
2623         bcol = bs*(*bj);
2624         for (kcol=0; kcol<bs; kcol++) {
2625           col  = bcol + kcol;                /* local col index */
2626           col += rowners_bs[rank+1];      /* global col index */
2627           for (krow=0; krow<bs; krow++) {
2628             atmp = PetscAbsScalar(*ba); ba++;
2629             row  = brow + krow;   /* local row index */
2630             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2631             if (work[col] < atmp) work[col] = atmp;
2632           }
2633         }
2634         bj++;
2635       }
2636     }
2637 
2638     /* send values to its owners */
2639     for (dest=rank+1; dest<size; dest++) {
2640       svalues = work + rowners_bs[dest];
2641       count   = rowners_bs[dest+1]-rowners_bs[dest];
2642       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2643     }
2644   }
2645 
2646   /* receive values */
2647   if (rank) {
2648     rvalues = work;
2649     count   = rowners_bs[rank+1]-rowners_bs[rank];
2650     for (source=0; source<rank; source++) {
2651       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2652       /* process values */
2653       for (i=0; i<count; i++) {
2654         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2655       }
2656     }
2657   }
2658 
2659   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2660   ierr = PetscFree(work);CHKERRQ(ierr);
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2665 {
2666   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2667   PetscErrorCode    ierr;
2668   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2669   PetscScalar       *x,*ptr,*from;
2670   Vec               bb1;
2671   const PetscScalar *b;
2672 
2673   PetscFunctionBegin;
2674   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2675   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2676 
2677   if (flag == SOR_APPLY_UPPER) {
2678     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2679     PetscFunctionReturn(0);
2680   }
2681 
2682   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2683     if (flag & SOR_ZERO_INITIAL_GUESS) {
2684       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2685       its--;
2686     }
2687 
2688     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2689     while (its--) {
2690 
2691       /* lower triangular part: slvec0b = - B^T*xx */
2692       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2693 
2694       /* copy xx into slvec0a */
2695       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2696       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2697       ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr);
2698       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2699 
2700       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2701 
2702       /* copy bb into slvec1a */
2703       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2704       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2705       ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr);
2706       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2707 
2708       /* set slvec1b = 0 */
2709       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2710 
2711       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2712       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2713       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2714       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2715 
2716       /* upper triangular part: bb1 = bb1 - B*x */
2717       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2718 
2719       /* local diagonal sweep */
2720       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2721     }
2722     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2723   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2724     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2725   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2726     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2727   } else if (flag & SOR_EISENSTAT) {
2728     Vec               xx1;
2729     PetscBool         hasop;
2730     const PetscScalar *diag;
2731     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2732     PetscInt          i,n;
2733 
2734     if (!mat->xx1) {
2735       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2736       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2737     }
2738     xx1 = mat->xx1;
2739     bb1 = mat->bb1;
2740 
2741     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2742 
2743     if (!mat->diag) {
2744       /* this is wrong for same matrix with new nonzero values */
2745       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2746       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2747     }
2748     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2749 
2750     if (hasop) {
2751       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2752       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2753     } else {
2754       /*
2755           These two lines are replaced by code that may be a bit faster for a good compiler
2756       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2757       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2758       */
2759       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2760       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2761       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2762       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2763       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2764       if (omega == 1.0) {
2765         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2766         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2767       } else {
2768         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2769         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2770       }
2771       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2772       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2773       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2774       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2775     }
2776 
2777     /* multiply off-diagonal portion of matrix */
2778     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2779     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2780     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2781     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2782     ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
2783     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2784     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2785     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2786     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2787     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2788 
2789     /* local sweep */
2790     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2791     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2792   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2793   PetscFunctionReturn(0);
2794 }
2795 
2796 /*@
2797      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2798          CSR format the local rows.
2799 
2800    Collective
2801 
2802    Input Parameters:
2803 +  comm - MPI communicator
2804 .  bs - the block size, only a block size of 1 is supported
2805 .  m - number of local rows (Cannot be PETSC_DECIDE)
2806 .  n - This value should be the same as the local size used in creating the
2807        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2808        calculated if N is given) For square matrices n is almost always m.
2809 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2810 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2811 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2812 .   j - column indices
2813 -   a - matrix values
2814 
2815    Output Parameter:
2816 .   mat - the matrix
2817 
2818    Level: intermediate
2819 
2820    Notes:
2821        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2822      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2823      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2824 
2825        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2826 
2827 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2828           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2829 @*/
2830 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2831 {
2832   PetscErrorCode ierr;
2833 
2834 
2835   PetscFunctionBegin;
2836   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2837   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2838   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2839   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2840   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2841   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2842   PetscFunctionReturn(0);
2843 }
2844 
2845 
2846 /*@C
2847    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2848 
2849    Collective
2850 
2851    Input Parameters:
2852 +  B - the matrix
2853 .  bs - the block size
2854 .  i - the indices into j for the start of each local row (starts with zero)
2855 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2856 -  v - optional values in the matrix
2857 
2858    Level: advanced
2859 
2860    Notes:
2861    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2862    and usually the numerical values as well
2863 
2864    Any entries below the diagonal are ignored
2865 
2866 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2867 @*/
2868 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2869 {
2870   PetscErrorCode ierr;
2871 
2872   PetscFunctionBegin;
2873   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2874   PetscFunctionReturn(0);
2875 }
2876 
2877 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2878 {
2879   PetscErrorCode ierr;
2880   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2881   PetscInt       *indx;
2882   PetscScalar    *values;
2883 
2884   PetscFunctionBegin;
2885   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2886   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2887     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2888     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
2889     PetscInt       *bindx,rmax=a->rmax,j;
2890     PetscMPIInt    rank,size;
2891 
2892     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2893     mbs = m/bs; Nbs = N/cbs;
2894     if (n == PETSC_DECIDE) {
2895       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
2896     }
2897     nbs = n/cbs;
2898 
2899     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2900     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
2901 
2902     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2903     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
2904     if (rank == size-1) {
2905       /* Check sum(nbs) = Nbs */
2906       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
2907     }
2908 
2909     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
2910     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2911     for (i=0; i<mbs; i++) {
2912       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2913       nnz  = nnz/bs;
2914       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2915       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2916       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2917     }
2918     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2919     ierr = PetscFree(bindx);CHKERRQ(ierr);
2920 
2921     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2922     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2923     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2924     ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr);
2925     ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
2926     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2927     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2928   }
2929 
2930   /* numeric phase */
2931   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2932   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
2933 
2934   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2935   for (i=0; i<m; i++) {
2936     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2937     Ii   = i + rstart;
2938     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2939     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2940   }
2941   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2942   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2943   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2944   PetscFunctionReturn(0);
2945 }
2946