xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 068661f951f5e7dd3018fdfb6d1e43b577750df8)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #if defined(PETSC_HAVE_SCALAPACK)
11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
12 #endif
13 
14 /* This could be moved to matimpl.h */
15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16 {
17   Mat            preallocator;
18   PetscInt       r,rstart,rend;
19   PetscInt       bs,i,m,n,M,N;
20   PetscBool      cong = PETSC_TRUE;
21   PetscErrorCode ierr;
22 
23   PetscFunctionBegin;
24   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
25   PetscValidLogicalCollectiveInt(B,nm,2);
26   for (i = 0; i < nm; i++) {
27     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
28     ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr);
29     if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
30   }
31   PetscValidLogicalCollectiveBool(B,fill,5);
32   ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr);
33   ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr);
34   ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
35   ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr);
36   ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
37   ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr);
38   ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
39   ierr = MatSetUp(preallocator);CHKERRQ(ierr);
40   ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
41   for (r = rstart; r < rend; ++r) {
42     PetscInt          ncols;
43     const PetscInt    *row;
44     const PetscScalar *vals;
45 
46     for (i = 0; i < nm; i++) {
47       ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
48       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
49       if (symm && symm[i]) {
50         ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
51       }
52       ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
53     }
54   }
55   ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
56   ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
57   ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr);
58   ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
59   PetscFunctionReturn(0);
60 }
61 
62 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
63 {
64   Mat            B;
65   PetscErrorCode ierr;
66   PetscInt       r;
67 
68   PetscFunctionBegin;
69   if (reuse != MAT_REUSE_MATRIX) {
70     PetscBool symm = PETSC_TRUE,isdense;
71     PetscInt  bs;
72 
73     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
74     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
75     ierr = MatSetType(B,newtype);CHKERRQ(ierr);
76     ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
77     ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr);
78     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
79     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
80     ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr);
81     if (!isdense) {
82       ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
83       ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr);
84       ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
85     } else {
86       ierr = MatSetUp(B);CHKERRQ(ierr);
87     }
88   } else {
89     B    = *newmat;
90     ierr = MatZeroEntries(B);CHKERRQ(ierr);
91   }
92 
93   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
94   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
95     PetscInt          ncols;
96     const PetscInt    *row;
97     const PetscScalar *vals;
98 
99     ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
100     ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
101 #if defined(PETSC_USE_COMPLEX)
102     if (A->hermitian) {
103       PetscInt i;
104       for (i = 0; i < ncols; i++) {
105         ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr);
106       }
107     } else {
108       ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
109     }
110 #else
111     ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
112 #endif
113     ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
114   }
115   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
116   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
117   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
118 
119   if (reuse == MAT_INPLACE_MATRIX) {
120     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
121   } else {
122     *newmat = B;
123   }
124   PetscFunctionReturn(0);
125 }
126 
127 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
128 {
129   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
130   PetscErrorCode ierr;
131 
132   PetscFunctionBegin;
133   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
134   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
139 {
140   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
141   PetscErrorCode ierr;
142 
143   PetscFunctionBegin;
144   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
145   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
150   { \
151     brow = row/bs;  \
152     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
153     rmax = aimax[brow]; nrow = ailen[brow]; \
154     bcol = col/bs; \
155     ridx = row % bs; cidx = col % bs; \
156     low  = 0; high = nrow; \
157     while (high-low > 3) { \
158       t = (low+high)/2; \
159       if (rp[t] > bcol) high = t; \
160       else              low  = t; \
161     } \
162     for (_i=low; _i<high; _i++) { \
163       if (rp[_i] > bcol) break; \
164       if (rp[_i] == bcol) { \
165         bap = ap + bs2*_i + bs*cidx + ridx; \
166         if (addv == ADD_VALUES) *bap += value;  \
167         else                    *bap  = value;  \
168         goto a_noinsert; \
169       } \
170     } \
171     if (a->nonew == 1) goto a_noinsert; \
172     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
173     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
174     N = nrow++ - 1;  \
175     /* shift up all the later entries in this row */ \
176     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
177     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
178     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
179     rp[_i]                      = bcol;  \
180     ap[bs2*_i + bs*cidx + ridx] = value;  \
181     A->nonzerostate++;\
182 a_noinsert:; \
183     ailen[brow] = nrow; \
184   }
185 
186 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
187   { \
188     brow = row/bs;  \
189     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
190     rmax = bimax[brow]; nrow = bilen[brow]; \
191     bcol = col/bs; \
192     ridx = row % bs; cidx = col % bs; \
193     low  = 0; high = nrow; \
194     while (high-low > 3) { \
195       t = (low+high)/2; \
196       if (rp[t] > bcol) high = t; \
197       else              low  = t; \
198     } \
199     for (_i=low; _i<high; _i++) { \
200       if (rp[_i] > bcol) break; \
201       if (rp[_i] == bcol) { \
202         bap = ap + bs2*_i + bs*cidx + ridx; \
203         if (addv == ADD_VALUES) *bap += value;  \
204         else                    *bap  = value;  \
205         goto b_noinsert; \
206       } \
207     } \
208     if (b->nonew == 1) goto b_noinsert; \
209     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
210     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
211     N = nrow++ - 1;  \
212     /* shift up all the later entries in this row */ \
213     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
214     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
215     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \
216     rp[_i]                      = bcol;  \
217     ap[bs2*_i + bs*cidx + ridx] = value;  \
218     B->nonzerostate++;\
219 b_noinsert:; \
220     bilen[brow] = nrow; \
221   }
222 
223 /* Only add/insert a(i,j) with i<=j (blocks).
224    Any a(i,j) with i>j input by user is ingored or generates an error
225 */
226 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
227 {
228   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
229   MatScalar      value;
230   PetscBool      roworiented = baij->roworiented;
231   PetscErrorCode ierr;
232   PetscInt       i,j,row,col;
233   PetscInt       rstart_orig=mat->rmap->rstart;
234   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
235   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
236 
237   /* Some Variables required in the macro */
238   Mat          A     = baij->A;
239   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
240   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
241   MatScalar    *aa   =a->a;
242 
243   Mat         B     = baij->B;
244   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
245   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
246   MatScalar   *ba   =b->a;
247 
248   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
249   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
250   MatScalar *ap,*bap;
251 
252   /* for stash */
253   PetscInt  n_loc, *in_loc = NULL;
254   MatScalar *v_loc = NULL;
255 
256   PetscFunctionBegin;
257   if (!baij->donotstash) {
258     if (n > baij->n_loc) {
259       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
260       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
261       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
262       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
263 
264       baij->n_loc = n;
265     }
266     in_loc = baij->in_loc;
267     v_loc  = baij->v_loc;
268   }
269 
270   for (i=0; i<m; i++) {
271     if (im[i] < 0) continue;
272     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
273     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
274       row = im[i] - rstart_orig;              /* local row index */
275       for (j=0; j<n; j++) {
276         if (im[i]/bs > in[j]/bs) {
277           if (a->ignore_ltriangular) {
278             continue;    /* ignore lower triangular blocks */
279           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
280         }
281         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
282           col  = in[j] - cstart_orig;         /* local col index */
283           brow = row/bs; bcol = col/bs;
284           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
285           if (roworiented) value = v[i*n+j];
286           else             value = v[i+j*m];
287           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
288           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
289         } else if (in[j] < 0) continue;
290         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
291         else {  /* off-diag entry (B) */
292           if (mat->was_assembled) {
293             if (!baij->colmap) {
294               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
295             }
296 #if defined(PETSC_USE_CTABLE)
297             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
298             col  = col - 1;
299 #else
300             col = baij->colmap[in[j]/bs] - 1;
301 #endif
302             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
303               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
304               col  =  in[j];
305               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
306               B    = baij->B;
307               b    = (Mat_SeqBAIJ*)(B)->data;
308               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
309               ba   = b->a;
310             } else col += in[j]%bs;
311           } else col = in[j];
312           if (roworiented) value = v[i*n+j];
313           else             value = v[i+j*m];
314           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
315           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
316         }
317       }
318     } else {  /* off processor entry */
319       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
320       if (!baij->donotstash) {
321         mat->assembled = PETSC_FALSE;
322         n_loc          = 0;
323         for (j=0; j<n; j++) {
324           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
325           in_loc[n_loc] = in[j];
326           if (roworiented) {
327             v_loc[n_loc] = v[i*n+j];
328           } else {
329             v_loc[n_loc] = v[j*m+i];
330           }
331           n_loc++;
332         }
333         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
334       }
335     }
336   }
337   PetscFunctionReturn(0);
338 }
339 
340 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
341 {
342   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
343   PetscErrorCode    ierr;
344   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
345   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
346   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
347   PetscBool         roworiented=a->roworiented;
348   const PetscScalar *value     = v;
349   MatScalar         *ap,*aa = a->a,*bap;
350 
351   PetscFunctionBegin;
352   if (col < row) {
353     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
354     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
355   }
356   rp   = aj + ai[row];
357   ap   = aa + bs2*ai[row];
358   rmax = imax[row];
359   nrow = ailen[row];
360   value = v;
361   low   = 0;
362   high  = nrow;
363 
364   while (high-low > 7) {
365     t = (low+high)/2;
366     if (rp[t] > col) high = t;
367     else             low  = t;
368   }
369   for (i=low; i<high; i++) {
370     if (rp[i] > col) break;
371     if (rp[i] == col) {
372       bap = ap +  bs2*i;
373       if (roworiented) {
374         if (is == ADD_VALUES) {
375           for (ii=0; ii<bs; ii++) {
376             for (jj=ii; jj<bs2; jj+=bs) {
377               bap[jj] += *value++;
378             }
379           }
380         } else {
381           for (ii=0; ii<bs; ii++) {
382             for (jj=ii; jj<bs2; jj+=bs) {
383               bap[jj] = *value++;
384             }
385           }
386         }
387       } else {
388         if (is == ADD_VALUES) {
389           for (ii=0; ii<bs; ii++) {
390             for (jj=0; jj<bs; jj++) {
391               *bap++ += *value++;
392             }
393           }
394         } else {
395           for (ii=0; ii<bs; ii++) {
396             for (jj=0; jj<bs; jj++) {
397               *bap++  = *value++;
398             }
399           }
400         }
401       }
402       goto noinsert2;
403     }
404   }
405   if (nonew == 1) goto noinsert2;
406   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
407   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
408   N = nrow++ - 1; high++;
409   /* shift up all the later entries in this row */
410   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
411   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
412   rp[i] = col;
413   bap   = ap +  bs2*i;
414   if (roworiented) {
415     for (ii=0; ii<bs; ii++) {
416       for (jj=ii; jj<bs2; jj+=bs) {
417         bap[jj] = *value++;
418       }
419     }
420   } else {
421     for (ii=0; ii<bs; ii++) {
422       for (jj=0; jj<bs; jj++) {
423         *bap++ = *value++;
424       }
425     }
426   }
427   noinsert2:;
428   ailen[row] = nrow;
429   PetscFunctionReturn(0);
430 }
431 
432 /*
433    This routine is exactly duplicated in mpibaij.c
434 */
435 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
436 {
437   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
438   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
439   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
440   PetscErrorCode    ierr;
441   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
442   PetscBool         roworiented=a->roworiented;
443   const PetscScalar *value     = v;
444   MatScalar         *ap,*aa = a->a,*bap;
445 
446   PetscFunctionBegin;
447   rp   = aj + ai[row];
448   ap   = aa + bs2*ai[row];
449   rmax = imax[row];
450   nrow = ailen[row];
451   low  = 0;
452   high = nrow;
453   value = v;
454   while (high-low > 7) {
455     t = (low+high)/2;
456     if (rp[t] > col) high = t;
457     else             low  = t;
458   }
459   for (i=low; i<high; i++) {
460     if (rp[i] > col) break;
461     if (rp[i] == col) {
462       bap = ap +  bs2*i;
463       if (roworiented) {
464         if (is == ADD_VALUES) {
465           for (ii=0; ii<bs; ii++) {
466             for (jj=ii; jj<bs2; jj+=bs) {
467               bap[jj] += *value++;
468             }
469           }
470         } else {
471           for (ii=0; ii<bs; ii++) {
472             for (jj=ii; jj<bs2; jj+=bs) {
473               bap[jj] = *value++;
474             }
475           }
476         }
477       } else {
478         if (is == ADD_VALUES) {
479           for (ii=0; ii<bs; ii++,value+=bs) {
480             for (jj=0; jj<bs; jj++) {
481               bap[jj] += value[jj];
482             }
483             bap += bs;
484           }
485         } else {
486           for (ii=0; ii<bs; ii++,value+=bs) {
487             for (jj=0; jj<bs; jj++) {
488               bap[jj]  = value[jj];
489             }
490             bap += bs;
491           }
492         }
493       }
494       goto noinsert2;
495     }
496   }
497   if (nonew == 1) goto noinsert2;
498   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
499   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
500   N = nrow++ - 1; high++;
501   /* shift up all the later entries in this row */
502   ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
503   ierr  = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
504   rp[i] = col;
505   bap   = ap +  bs2*i;
506   if (roworiented) {
507     for (ii=0; ii<bs; ii++) {
508       for (jj=ii; jj<bs2; jj+=bs) {
509         bap[jj] = *value++;
510       }
511     }
512   } else {
513     for (ii=0; ii<bs; ii++) {
514       for (jj=0; jj<bs; jj++) {
515         *bap++ = *value++;
516       }
517     }
518   }
519   noinsert2:;
520   ailen[row] = nrow;
521   PetscFunctionReturn(0);
522 }
523 
524 /*
525     This routine could be optimized by removing the need for the block copy below and passing stride information
526   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
527 */
528 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
529 {
530   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
531   const MatScalar *value;
532   MatScalar       *barray     =baij->barray;
533   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
534   PetscErrorCode  ierr;
535   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
536   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
537   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
538 
539   PetscFunctionBegin;
540   if (!barray) {
541     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
542     baij->barray = barray;
543   }
544 
545   if (roworiented) {
546     stepval = (n-1)*bs;
547   } else {
548     stepval = (m-1)*bs;
549   }
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
553     if (im[i] >= rstart && im[i] < rend) {
554       row = im[i] - rstart;
555       for (j=0; j<n; j++) {
556         if (im[i] > in[j]) {
557           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
558           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
559         }
560         /* If NumCol = 1 then a copy is not required */
561         if ((roworiented) && (n == 1)) {
562           barray = (MatScalar*) v + i*bs2;
563         } else if ((!roworiented) && (m == 1)) {
564           barray = (MatScalar*) v + j*bs2;
565         } else { /* Here a copy is required */
566           if (roworiented) {
567             value = v + i*(stepval+bs)*bs + j*bs;
568           } else {
569             value = v + j*(stepval+bs)*bs + i*bs;
570           }
571           for (ii=0; ii<bs; ii++,value+=stepval) {
572             for (jj=0; jj<bs; jj++) {
573               *barray++ = *value++;
574             }
575           }
576           barray -=bs2;
577         }
578 
579         if (in[j] >= cstart && in[j] < cend) {
580           col  = in[j] - cstart;
581           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
582         } else if (in[j] < 0) continue;
583         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
584         else {
585           if (mat->was_assembled) {
586             if (!baij->colmap) {
587               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 
590 #if defined(PETSC_USE_DEBUG)
591 #if defined(PETSC_USE_CTABLE)
592             { PetscInt data;
593               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
594               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
595             }
596 #else
597             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
598 #endif
599 #endif
600 #if defined(PETSC_USE_CTABLE)
601             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
602             col  = (col - 1)/bs;
603 #else
604             col = (baij->colmap[in[j]] - 1)/bs;
605 #endif
606             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
607               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
608               col  = in[j];
609             }
610           } else col = in[j];
611           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
612         }
613       }
614     } else {
615       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
616       if (!baij->donotstash) {
617         if (roworiented) {
618           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
633   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
640       row = idxm[i] - bsrstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= bscstart && idxn[j] < bscend) {
645           col  = idxn[j] - bscstart;
646           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!baij->colmap) {
649             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
653           data--;
654 #else
655           data = baij->colmap[idxn[j]/bs]-1;
656 #endif
657           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
658           else {
659             col  = data + idxn[j]%bs;
660             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
670 {
671   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
672   PetscErrorCode ierr;
673   PetscReal      sum[2],*lnorm2;
674 
675   PetscFunctionBegin;
676   if (baij->size == 1) {
677     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
678   } else {
679     if (type == NORM_FROBENIUS) {
680       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
681       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
682       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
683       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
684       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
685       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
686       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
687       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
688     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
689       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
690       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
691       PetscReal    *rsum,*rsum2,vabs;
692       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
693       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
694       MatScalar    *v;
695 
696       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
697       ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr);
698       /* Amat */
699       v = amat->a; jj = amat->j;
700       for (brow=0; brow<mbs; brow++) {
701         grow = bs*(rstart + brow);
702         nz   = amat->i[brow+1] - amat->i[brow];
703         for (bcol=0; bcol<nz; bcol++) {
704           gcol = bs*(rstart + *jj); jj++;
705           for (col=0; col<bs; col++) {
706             for (row=0; row<bs; row++) {
707               vabs            = PetscAbsScalar(*v); v++;
708               rsum[gcol+col] += vabs;
709               /* non-diagonal block */
710               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
711             }
712           }
713         }
714         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
715       }
716       /* Bmat */
717       v = bmat->a; jj = bmat->j;
718       for (brow=0; brow<mbs; brow++) {
719         grow = bs*(rstart + brow);
720         nz = bmat->i[brow+1] - bmat->i[brow];
721         for (bcol=0; bcol<nz; bcol++) {
722           gcol = bs*garray[*jj]; jj++;
723           for (col=0; col<bs; col++) {
724             for (row=0; row<bs; row++) {
725               vabs            = PetscAbsScalar(*v); v++;
726               rsum[gcol+col] += vabs;
727               rsum[grow+row] += vabs;
728             }
729           }
730         }
731         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
732       }
733       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
734       *norm = 0.0;
735       for (col=0; col<mat->cmap->N; col++) {
736         if (rsum2[col] > *norm) *norm = rsum2[col];
737       }
738       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
739     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
745 {
746   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
747   PetscErrorCode ierr;
748   PetscInt       nstash,reallocs;
749 
750   PetscFunctionBegin;
751   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
752 
753   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
754   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
755   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
756   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
757   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
758   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
759   PetscFunctionReturn(0);
760 }
761 
762 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
763 {
764   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
765   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
766   PetscErrorCode ierr;
767   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
768   PetscInt       *row,*col;
769   PetscBool      other_disassembled;
770   PetscMPIInt    n;
771   PetscBool      r1,r2,r3;
772   MatScalar      *val;
773 
774   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
775   PetscFunctionBegin;
776   if (!baij->donotstash &&  !mat->nooffprocentries) {
777     while (1) {
778       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
779       if (!flg) break;
780 
781       for (i=0; i<n;) {
782         /* Now identify the consecutive vals belonging to the same row */
783         for (j=i,rstart=row[j]; j<n; j++) {
784           if (row[j] != rstart) break;
785         }
786         if (j < n) ncols = j-i;
787         else       ncols = n-i;
788         /* Now assemble all these values with a single function call */
789         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
790         i    = j;
791       }
792     }
793     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
794     /* Now process the block-stash. Since the values are stashed column-oriented,
795        set the roworiented flag to column oriented, and after MatSetValues()
796        restore the original flags */
797     r1 = baij->roworiented;
798     r2 = a->roworiented;
799     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
800 
801     baij->roworiented = PETSC_FALSE;
802     a->roworiented    = PETSC_FALSE;
803 
804     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
805     while (1) {
806       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
807       if (!flg) break;
808 
809       for (i=0; i<n;) {
810         /* Now identify the consecutive vals belonging to the same row */
811         for (j=i,rstart=row[j]; j<n; j++) {
812           if (row[j] != rstart) break;
813         }
814         if (j < n) ncols = j-i;
815         else       ncols = n-i;
816         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
817         i    = j;
818       }
819     }
820     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
821 
822     baij->roworiented = r1;
823     a->roworiented    = r2;
824 
825     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
826   }
827 
828   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
829   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
830 
831   /* determine if any processor has disassembled, if so we must
832      also disassemble ourselfs, in order that we may reassemble. */
833   /*
834      if nonzero structure of submatrix B cannot change then we know that
835      no processor disassembled thus we can skip this stuff
836   */
837   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
838     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
839     if (mat->was_assembled && !other_disassembled) {
840       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
841     }
842   }
843 
844   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
845     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
846   }
847   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
848   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
849 
850   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
851 
852   baij->rowvalues = NULL;
853 
854   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
855   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
856     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
857     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
858   }
859   PetscFunctionReturn(0);
860 }
861 
862 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
863 #include <petscdraw.h>
864 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
865 {
866   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
867   PetscErrorCode    ierr;
868   PetscInt          bs   = mat->rmap->bs;
869   PetscMPIInt       rank = baij->rank;
870   PetscBool         iascii,isdraw;
871   PetscViewer       sviewer;
872   PetscViewerFormat format;
873 
874   PetscFunctionBegin;
875   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
876   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
877   if (iascii) {
878     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
879     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
880       MatInfo info;
881       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
882       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
883       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
884       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
885       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
886       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
887       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
889       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
890       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
891       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
892       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
893       PetscFunctionReturn(0);
894     } else if (format == PETSC_VIEWER_ASCII_INFO) {
895       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
896       PetscFunctionReturn(0);
897     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
898       PetscFunctionReturn(0);
899     }
900   }
901 
902   if (isdraw) {
903     PetscDraw draw;
904     PetscBool isnull;
905     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
906     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
907     if (isnull) PetscFunctionReturn(0);
908   }
909 
910   {
911     /* assemble the entire matrix onto first processor. */
912     Mat          A;
913     Mat_SeqSBAIJ *Aloc;
914     Mat_SeqBAIJ  *Bloc;
915     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
916     MatScalar    *a;
917     const char   *matname;
918 
919     /* Should this be the same type as mat? */
920     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
921     if (!rank) {
922       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
923     } else {
924       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
925     }
926     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
927     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
928     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
929     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
930 
931     /* copy over the A part */
932     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
933     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
934     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
935 
936     for (i=0; i<mbs; i++) {
937       rvals[0] = bs*(baij->rstartbs + i);
938       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
939       for (j=ai[i]; j<ai[i+1]; j++) {
940         col = (baij->cstartbs+aj[j])*bs;
941         for (k=0; k<bs; k++) {
942           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
943           col++;
944           a += bs;
945         }
946       }
947     }
948     /* copy over the B part */
949     Bloc = (Mat_SeqBAIJ*)baij->B->data;
950     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
951     for (i=0; i<mbs; i++) {
952 
953       rvals[0] = bs*(baij->rstartbs + i);
954       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
955       for (j=ai[i]; j<ai[i+1]; j++) {
956         col = baij->garray[aj[j]]*bs;
957         for (k=0; k<bs; k++) {
958           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
959           col++;
960           a += bs;
961         }
962       }
963     }
964     ierr = PetscFree(rvals);CHKERRQ(ierr);
965     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
966     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
967     /*
968        Everyone has to call to draw the matrix since the graphics waits are
969        synchronized across all processors that share the PetscDraw object
970     */
971     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
972     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
973     if (!rank) {
974       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
975       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
976     }
977     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
978     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
979     ierr = MatDestroy(&A);CHKERRQ(ierr);
980   }
981   PetscFunctionReturn(0);
982 }
983 
984 /* Used for both MPIBAIJ and MPISBAIJ matrices */
985 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
986 
987 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
988 {
989   PetscErrorCode ierr;
990   PetscBool      iascii,isdraw,issocket,isbinary;
991 
992   PetscFunctionBegin;
993   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
994   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
995   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
996   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
997   if (iascii || isdraw || issocket) {
998     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
999   } else if (isbinary) {
1000     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1001   }
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1006 {
1007   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1008   PetscErrorCode ierr;
1009 
1010   PetscFunctionBegin;
1011 #if defined(PETSC_USE_LOG)
1012   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1013 #endif
1014   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1015   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1016   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1017   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1018 #if defined(PETSC_USE_CTABLE)
1019   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1020 #else
1021   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1022 #endif
1023   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1024   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1025   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1026   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1027   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1028   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1029   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1030   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1031   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1032   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1033   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1034   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1035   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1036   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1037   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1038 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1039   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1040 #endif
1041   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1042   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1043   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1044   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1045 
1046   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1047   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1048   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1049   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1050   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1051 #if defined(PETSC_HAVE_ELEMENTAL)
1052   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1053 #endif
1054 #if defined(PETSC_HAVE_SCALAPACK)
1055   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL);CHKERRQ(ierr);
1056 #endif
1057   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr);
1058   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr);
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1063 {
1064   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1065   PetscErrorCode    ierr;
1066   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1067   PetscScalar       *from;
1068   const PetscScalar *x;
1069 
1070   PetscFunctionBegin;
1071   /* diagonal part */
1072   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1073   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1074 
1075   /* subdiagonal part */
1076   if (!a->B->ops->multhermitiantranspose) SETERRQ1(PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s\n",((PetscObject)a->B)->type_name);
1077   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1078 
1079   /* copy x into the vec slvec0 */
1080   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1081   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1082 
1083   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1084   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1085   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1086 
1087   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1088   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1089   /* supperdiagonal part */
1090   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1095 {
1096   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1097   PetscErrorCode    ierr;
1098   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1099   PetscScalar       *from;
1100   const PetscScalar *x;
1101 
1102   PetscFunctionBegin;
1103   /* diagonal part */
1104   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1105   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1106 
1107   /* subdiagonal part */
1108   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1109 
1110   /* copy x into the vec slvec0 */
1111   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1112   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1113 
1114   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1115   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1116   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1117 
1118   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1120   /* supperdiagonal part */
1121   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1126 {
1127   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1128   PetscErrorCode    ierr;
1129   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1130   PetscScalar       *from,zero=0.0;
1131   const PetscScalar *x;
1132 
1133   PetscFunctionBegin;
1134   /* diagonal part */
1135   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1136   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1137 
1138   /* subdiagonal part */
1139   if (!a->B->ops->multhermitiantranspose) SETERRQ1(PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s\n",((PetscObject)a->B)->type_name);
1140   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1141 
1142   /* copy x into the vec slvec0 */
1143   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1144   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1145   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1146   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1147 
1148   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1149   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1150   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1151 
1152   /* supperdiagonal part */
1153   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1158 {
1159   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1160   PetscErrorCode    ierr;
1161   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1162   PetscScalar       *from,zero=0.0;
1163   const PetscScalar *x;
1164 
1165   PetscFunctionBegin;
1166   /* diagonal part */
1167   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1168   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1169 
1170   /* subdiagonal part */
1171   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1172 
1173   /* copy x into the vec slvec0 */
1174   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1175   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1176   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1177   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1178 
1179   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1180   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1181   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1182 
1183   /* supperdiagonal part */
1184   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 /*
1189   This only works correctly for square matrices where the subblock A->A is the
1190    diagonal block
1191 */
1192 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1193 {
1194   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1195   PetscErrorCode ierr;
1196 
1197   PetscFunctionBegin;
1198   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1199   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1204 {
1205   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1206   PetscErrorCode ierr;
1207 
1208   PetscFunctionBegin;
1209   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1210   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1215 {
1216   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1217   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1218   PetscErrorCode ierr;
1219   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1220   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1221   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1222 
1223   PetscFunctionBegin;
1224   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1225   mat->getrowactive = PETSC_TRUE;
1226 
1227   if (!mat->rowvalues && (idx || v)) {
1228     /*
1229         allocate enough space to hold information from the longest row.
1230     */
1231     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1232     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1233     PetscInt     max = 1,mbs = mat->mbs,tmp;
1234     for (i=0; i<mbs; i++) {
1235       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1236       if (max < tmp) max = tmp;
1237     }
1238     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1239   }
1240 
1241   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1242   lrow = row - brstart;  /* local row index */
1243 
1244   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1245   if (!v)   {pvA = NULL; pvB = NULL;}
1246   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1247   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1248   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1249   nztot = nzA + nzB;
1250 
1251   cmap = mat->garray;
1252   if (v  || idx) {
1253     if (nztot) {
1254       /* Sort by increasing column numbers, assuming A and B already sorted */
1255       PetscInt imark = -1;
1256       if (v) {
1257         *v = v_p = mat->rowvalues;
1258         for (i=0; i<nzB; i++) {
1259           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1260           else break;
1261         }
1262         imark = i;
1263         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1264         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1265       }
1266       if (idx) {
1267         *idx = idx_p = mat->rowindices;
1268         if (imark > -1) {
1269           for (i=0; i<imark; i++) {
1270             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1271           }
1272         } else {
1273           for (i=0; i<nzB; i++) {
1274             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1275             else break;
1276           }
1277           imark = i;
1278         }
1279         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1280         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1281       }
1282     } else {
1283       if (idx) *idx = NULL;
1284       if (v)   *v   = NULL;
1285     }
1286   }
1287   *nz  = nztot;
1288   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1289   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1294 {
1295   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1296 
1297   PetscFunctionBegin;
1298   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1299   baij->getrowactive = PETSC_FALSE;
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1304 {
1305   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1306   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1307 
1308   PetscFunctionBegin;
1309   aA->getrow_utriangular = PETSC_TRUE;
1310   PetscFunctionReturn(0);
1311 }
1312 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1313 {
1314   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1315   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1316 
1317   PetscFunctionBegin;
1318   aA->getrow_utriangular = PETSC_FALSE;
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1323 {
1324   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1329   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1334 {
1335   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1340   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1345    Input: isrow       - distributed(parallel),
1346           iscol_local - locally owned (seq)
1347 */
1348 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1349 {
1350   PetscErrorCode ierr;
1351   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1352   const PetscInt *ptr1,*ptr2;
1353 
1354   PetscFunctionBegin;
1355   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1356   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1357   if (sz1 > sz2) {
1358     *flg = PETSC_FALSE;
1359     PetscFunctionReturn(0);
1360   }
1361 
1362   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1363   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1364 
1365   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1366   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1367   ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr);
1368   ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr);
1369   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1370   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1371 
1372   nmatch=0;
1373   k     = 0;
1374   for (i=0; i<sz1; i++){
1375     for (j=k; j<sz2; j++){
1376       if (a1[i] == a2[j]) {
1377         k = j; nmatch++;
1378         break;
1379       }
1380     }
1381   }
1382   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1383   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1384   ierr = PetscFree(a1);CHKERRQ(ierr);
1385   ierr = PetscFree(a2);CHKERRQ(ierr);
1386   if (nmatch < sz1) {
1387     *flg = PETSC_FALSE;
1388   } else {
1389     *flg = PETSC_TRUE;
1390   }
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1395 {
1396   PetscErrorCode ierr;
1397   IS             iscol_local;
1398   PetscInt       csize;
1399   PetscBool      isequal;
1400 
1401   PetscFunctionBegin;
1402   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1403   if (call == MAT_REUSE_MATRIX) {
1404     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1405     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1406   } else {
1407     PetscBool issorted;
1408 
1409     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1410     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1411     ierr = ISSorted(iscol_local, &issorted);CHKERRQ(ierr);
1412     if (!isequal || !issorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow and be sorted");
1413   }
1414 
1415   /* now call MatCreateSubMatrix_MPIBAIJ() */
1416   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1417   if (call == MAT_INITIAL_MATRIX) {
1418     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1419     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1420   }
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1425 {
1426   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1427   PetscErrorCode ierr;
1428 
1429   PetscFunctionBegin;
1430   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1431   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1436 {
1437   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1438   Mat            A  = a->A,B = a->B;
1439   PetscErrorCode ierr;
1440   PetscLogDouble isend[5],irecv[5];
1441 
1442   PetscFunctionBegin;
1443   info->block_size = (PetscReal)matin->rmap->bs;
1444 
1445   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1446 
1447   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1448   isend[3] = info->memory;  isend[4] = info->mallocs;
1449 
1450   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1451 
1452   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1453   isend[3] += info->memory;  isend[4] += info->mallocs;
1454   if (flag == MAT_LOCAL) {
1455     info->nz_used      = isend[0];
1456     info->nz_allocated = isend[1];
1457     info->nz_unneeded  = isend[2];
1458     info->memory       = isend[3];
1459     info->mallocs      = isend[4];
1460   } else if (flag == MAT_GLOBAL_MAX) {
1461     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1462 
1463     info->nz_used      = irecv[0];
1464     info->nz_allocated = irecv[1];
1465     info->nz_unneeded  = irecv[2];
1466     info->memory       = irecv[3];
1467     info->mallocs      = irecv[4];
1468   } else if (flag == MAT_GLOBAL_SUM) {
1469     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1470 
1471     info->nz_used      = irecv[0];
1472     info->nz_allocated = irecv[1];
1473     info->nz_unneeded  = irecv[2];
1474     info->memory       = irecv[3];
1475     info->mallocs      = irecv[4];
1476   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1477   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1478   info->fill_ratio_needed = 0;
1479   info->factor_mallocs    = 0;
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1484 {
1485   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1486   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1487   PetscErrorCode ierr;
1488 
1489   PetscFunctionBegin;
1490   switch (op) {
1491   case MAT_NEW_NONZERO_LOCATIONS:
1492   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1493   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1494   case MAT_KEEP_NONZERO_PATTERN:
1495   case MAT_SUBMAT_SINGLEIS:
1496   case MAT_NEW_NONZERO_LOCATION_ERR:
1497     MatCheckPreallocated(A,1);
1498     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1499     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1500     break;
1501   case MAT_ROW_ORIENTED:
1502     MatCheckPreallocated(A,1);
1503     a->roworiented = flg;
1504 
1505     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1506     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1507     break;
1508   case MAT_FORCE_DIAGONAL_ENTRIES:
1509   case MAT_SORTED_FULL:
1510     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1511     break;
1512   case MAT_IGNORE_OFF_PROC_ENTRIES:
1513     a->donotstash = flg;
1514     break;
1515   case MAT_USE_HASH_TABLE:
1516     a->ht_flag = flg;
1517     break;
1518   case MAT_HERMITIAN:
1519     MatCheckPreallocated(A,1);
1520     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1521 #if defined(PETSC_USE_COMPLEX)
1522     if (flg) { /* need different mat-vec ops */
1523       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1524       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1525       A->ops->multtranspose    = NULL;
1526       A->ops->multtransposeadd = NULL;
1527       A->symmetric = PETSC_FALSE;
1528     }
1529 #endif
1530     break;
1531   case MAT_SPD:
1532   case MAT_SYMMETRIC:
1533     MatCheckPreallocated(A,1);
1534     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1535 #if defined(PETSC_USE_COMPLEX)
1536     if (flg) { /* restore to use default mat-vec ops */
1537       A->ops->mult             = MatMult_MPISBAIJ;
1538       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1539       A->ops->multtranspose    = MatMult_MPISBAIJ;
1540       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1541     }
1542 #endif
1543     break;
1544   case MAT_STRUCTURALLY_SYMMETRIC:
1545     MatCheckPreallocated(A,1);
1546     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1547     break;
1548   case MAT_SYMMETRY_ETERNAL:
1549     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1550     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1551     break;
1552   case MAT_IGNORE_LOWER_TRIANGULAR:
1553     aA->ignore_ltriangular = flg;
1554     break;
1555   case MAT_ERROR_LOWER_TRIANGULAR:
1556     aA->ignore_ltriangular = flg;
1557     break;
1558   case MAT_GETROW_UPPERTRIANGULAR:
1559     aA->getrow_utriangular = flg;
1560     break;
1561   default:
1562     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1563   }
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1568 {
1569   PetscErrorCode ierr;
1570 
1571   PetscFunctionBegin;
1572   if (reuse == MAT_INITIAL_MATRIX) {
1573     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1574   }  else if (reuse == MAT_REUSE_MATRIX) {
1575     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1576   }
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1581 {
1582   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1583   Mat            a     = baij->A, b=baij->B;
1584   PetscErrorCode ierr;
1585   PetscInt       nv,m,n;
1586   PetscBool      flg;
1587 
1588   PetscFunctionBegin;
1589   if (ll != rr) {
1590     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1591     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1592   }
1593   if (!ll) PetscFunctionReturn(0);
1594 
1595   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1596   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1597 
1598   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1599   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1600 
1601   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1602 
1603   /* left diagonalscale the off-diagonal part */
1604   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1605 
1606   /* scale the diagonal part */
1607   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1608 
1609   /* right diagonalscale the off-diagonal part */
1610   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1611   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1616 {
1617   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1618   PetscErrorCode ierr;
1619 
1620   PetscFunctionBegin;
1621   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1626 
1627 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1628 {
1629   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1630   Mat            a,b,c,d;
1631   PetscBool      flg;
1632   PetscErrorCode ierr;
1633 
1634   PetscFunctionBegin;
1635   a = matA->A; b = matA->B;
1636   c = matB->A; d = matB->B;
1637 
1638   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1639   if (flg) {
1640     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1641   }
1642   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1643   PetscFunctionReturn(0);
1644 }
1645 
1646 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1647 {
1648   PetscErrorCode ierr;
1649   PetscBool      isbaij;
1650 
1651   PetscFunctionBegin;
1652   ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
1653   if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1654   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1655   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1656     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1657     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1658     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1659   } else {
1660     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1661     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1662 
1663     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1664     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1665   }
1666   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1667   PetscFunctionReturn(0);
1668 }
1669 
1670 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1671 {
1672   PetscErrorCode ierr;
1673 
1674   PetscFunctionBegin;
1675   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1680 {
1681   PetscErrorCode ierr;
1682   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1683   PetscBLASInt   bnz,one=1;
1684   Mat_SeqSBAIJ   *xa,*ya;
1685   Mat_SeqBAIJ    *xb,*yb;
1686 
1687   PetscFunctionBegin;
1688   if (str == SAME_NONZERO_PATTERN) {
1689     PetscScalar alpha = a;
1690     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1691     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1692     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1693     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1694     xb   = (Mat_SeqBAIJ*)xx->B->data;
1695     yb   = (Mat_SeqBAIJ*)yy->B->data;
1696     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1697     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1698     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1699   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1700     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1701     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1702     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1703   } else {
1704     Mat      B;
1705     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1706     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1707     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1708     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1709     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1710     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1711     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1712     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1713     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1714     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1715     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1716     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1717     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1718     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1719     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1720     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1721     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1722     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1723     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1724     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1725   }
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1730 {
1731   PetscErrorCode ierr;
1732   PetscInt       i;
1733   PetscBool      flg;
1734 
1735   PetscFunctionBegin;
1736   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1737   for (i=0; i<n; i++) {
1738     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1739     if (!flg) {
1740       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1741     }
1742   }
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1747 {
1748   PetscErrorCode ierr;
1749   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1750   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1751 
1752   PetscFunctionBegin;
1753   if (!Y->preallocated) {
1754     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1755   } else if (!aij->nz) {
1756     PetscInt nonew = aij->nonew;
1757     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1758     aij->nonew = nonew;
1759   }
1760   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1765 {
1766   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1767   PetscErrorCode ierr;
1768 
1769   PetscFunctionBegin;
1770   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1771   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1772   if (d) {
1773     PetscInt rstart;
1774     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1775     *d += rstart/A->rmap->bs;
1776 
1777   }
1778   PetscFunctionReturn(0);
1779 }
1780 
1781 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1782 {
1783   PetscFunctionBegin;
1784   *a = ((Mat_MPISBAIJ*)A->data)->A;
1785   PetscFunctionReturn(0);
1786 }
1787 
1788 /* -------------------------------------------------------------------*/
1789 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1790                                        MatGetRow_MPISBAIJ,
1791                                        MatRestoreRow_MPISBAIJ,
1792                                        MatMult_MPISBAIJ,
1793                                /*  4*/ MatMultAdd_MPISBAIJ,
1794                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1795                                        MatMultAdd_MPISBAIJ,
1796                                        NULL,
1797                                        NULL,
1798                                        NULL,
1799                                /* 10*/ NULL,
1800                                        NULL,
1801                                        NULL,
1802                                        MatSOR_MPISBAIJ,
1803                                        MatTranspose_MPISBAIJ,
1804                                /* 15*/ MatGetInfo_MPISBAIJ,
1805                                        MatEqual_MPISBAIJ,
1806                                        MatGetDiagonal_MPISBAIJ,
1807                                        MatDiagonalScale_MPISBAIJ,
1808                                        MatNorm_MPISBAIJ,
1809                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1810                                        MatAssemblyEnd_MPISBAIJ,
1811                                        MatSetOption_MPISBAIJ,
1812                                        MatZeroEntries_MPISBAIJ,
1813                                /* 24*/ NULL,
1814                                        NULL,
1815                                        NULL,
1816                                        NULL,
1817                                        NULL,
1818                                /* 29*/ MatSetUp_MPISBAIJ,
1819                                        NULL,
1820                                        NULL,
1821                                        MatGetDiagonalBlock_MPISBAIJ,
1822                                        NULL,
1823                                /* 34*/ MatDuplicate_MPISBAIJ,
1824                                        NULL,
1825                                        NULL,
1826                                        NULL,
1827                                        NULL,
1828                                /* 39*/ MatAXPY_MPISBAIJ,
1829                                        MatCreateSubMatrices_MPISBAIJ,
1830                                        MatIncreaseOverlap_MPISBAIJ,
1831                                        MatGetValues_MPISBAIJ,
1832                                        MatCopy_MPISBAIJ,
1833                                /* 44*/ NULL,
1834                                        MatScale_MPISBAIJ,
1835                                        MatShift_MPISBAIJ,
1836                                        NULL,
1837                                        NULL,
1838                                /* 49*/ NULL,
1839                                        NULL,
1840                                        NULL,
1841                                        NULL,
1842                                        NULL,
1843                                /* 54*/ NULL,
1844                                        NULL,
1845                                        MatSetUnfactored_MPISBAIJ,
1846                                        NULL,
1847                                        MatSetValuesBlocked_MPISBAIJ,
1848                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1849                                        NULL,
1850                                        NULL,
1851                                        NULL,
1852                                        NULL,
1853                                /* 64*/ NULL,
1854                                        NULL,
1855                                        NULL,
1856                                        NULL,
1857                                        NULL,
1858                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1859                                        NULL,
1860                                        MatConvert_MPISBAIJ_Basic,
1861                                        NULL,
1862                                        NULL,
1863                                /* 74*/ NULL,
1864                                        NULL,
1865                                        NULL,
1866                                        NULL,
1867                                        NULL,
1868                                /* 79*/ NULL,
1869                                        NULL,
1870                                        NULL,
1871                                        NULL,
1872                                        MatLoad_MPISBAIJ,
1873                                /* 84*/ NULL,
1874                                        NULL,
1875                                        NULL,
1876                                        NULL,
1877                                        NULL,
1878                                /* 89*/ NULL,
1879                                        NULL,
1880                                        NULL,
1881                                        NULL,
1882                                        NULL,
1883                                /* 94*/ NULL,
1884                                        NULL,
1885                                        NULL,
1886                                        NULL,
1887                                        NULL,
1888                                /* 99*/ NULL,
1889                                        NULL,
1890                                        NULL,
1891                                        NULL,
1892                                        NULL,
1893                                /*104*/ NULL,
1894                                        MatRealPart_MPISBAIJ,
1895                                        MatImaginaryPart_MPISBAIJ,
1896                                        MatGetRowUpperTriangular_MPISBAIJ,
1897                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1898                                /*109*/ NULL,
1899                                        NULL,
1900                                        NULL,
1901                                        NULL,
1902                                        MatMissingDiagonal_MPISBAIJ,
1903                                /*114*/ NULL,
1904                                        NULL,
1905                                        NULL,
1906                                        NULL,
1907                                        NULL,
1908                                /*119*/ NULL,
1909                                        NULL,
1910                                        NULL,
1911                                        NULL,
1912                                        NULL,
1913                                /*124*/ NULL,
1914                                        NULL,
1915                                        NULL,
1916                                        NULL,
1917                                        NULL,
1918                                /*129*/ NULL,
1919                                        NULL,
1920                                        NULL,
1921                                        NULL,
1922                                        NULL,
1923                                /*134*/ NULL,
1924                                        NULL,
1925                                        NULL,
1926                                        NULL,
1927                                        NULL,
1928                                /*139*/ MatSetBlockSizes_Default,
1929                                        NULL,
1930                                        NULL,
1931                                        NULL,
1932                                        NULL,
1933                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1934 };
1935 
1936 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1937 {
1938   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1939   PetscErrorCode ierr;
1940   PetscInt       i,mbs,Mbs;
1941   PetscMPIInt    size;
1942 
1943   PetscFunctionBegin;
1944   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1945   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1946   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1947   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1948   if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N);
1949   if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n);
1950 
1951   mbs = B->rmap->n/bs;
1952   Mbs = B->rmap->N/bs;
1953   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1954 
1955   B->rmap->bs = bs;
1956   b->bs2      = bs*bs;
1957   b->mbs      = mbs;
1958   b->Mbs      = Mbs;
1959   b->nbs      = B->cmap->n/bs;
1960   b->Nbs      = B->cmap->N/bs;
1961 
1962   for (i=0; i<=b->size; i++) {
1963     b->rangebs[i] = B->rmap->range[i]/bs;
1964   }
1965   b->rstartbs = B->rmap->rstart/bs;
1966   b->rendbs   = B->rmap->rend/bs;
1967 
1968   b->cstartbs = B->cmap->rstart/bs;
1969   b->cendbs   = B->cmap->rend/bs;
1970 
1971 #if defined(PETSC_USE_CTABLE)
1972   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
1973 #else
1974   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
1975 #endif
1976   ierr = PetscFree(b->garray);CHKERRQ(ierr);
1977   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
1978   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
1979   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
1980   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
1981   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
1982   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
1983   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
1984   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
1985 
1986   /* Because the B will have been resized we simply destroy it and create a new one each time */
1987   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
1988   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
1989   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1990   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
1991   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1992   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1993 
1994   if (!B->preallocated) {
1995     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1996     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1997     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1998     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1999     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2000   }
2001 
2002   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2003   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2004 
2005   B->preallocated  = PETSC_TRUE;
2006   B->was_assembled = PETSC_FALSE;
2007   B->assembled     = PETSC_FALSE;
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2012 {
2013   PetscInt       m,rstart,cend;
2014   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2015   const PetscInt *JJ    =NULL;
2016   PetscScalar    *values=NULL;
2017   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
2018   PetscErrorCode ierr;
2019   PetscBool      nooffprocentries;
2020 
2021   PetscFunctionBegin;
2022   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2023   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2024   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2025   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2026   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2027   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2028   m      = B->rmap->n/bs;
2029   rstart = B->rmap->rstart/bs;
2030   cend   = B->cmap->rend/bs;
2031 
2032   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2033   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2034   for (i=0; i<m; i++) {
2035     nz = ii[i+1] - ii[i];
2036     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2037     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2038     JJ     = jj + ii[i];
2039     bd     = 0;
2040     for (j=0; j<nz; j++) {
2041       if (*JJ >= i + rstart) break;
2042       JJ++;
2043       bd++;
2044     }
2045     d  = 0;
2046     for (; j<nz; j++) {
2047       if (*JJ++ >= cend) break;
2048       d++;
2049     }
2050     d_nnz[i] = d;
2051     o_nnz[i] = nz - d - bd;
2052     nz       = nz - bd;
2053     nz_max = PetscMax(nz_max,nz);
2054   }
2055   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2056   ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2057   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2058 
2059   values = (PetscScalar*)V;
2060   if (!values) {
2061     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2062   }
2063   for (i=0; i<m; i++) {
2064     PetscInt          row    = i + rstart;
2065     PetscInt          ncols  = ii[i+1] - ii[i];
2066     const PetscInt    *icols = jj + ii[i];
2067     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2068       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2069       ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2070     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2071       PetscInt j;
2072       for (j=0; j<ncols; j++) {
2073         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2074         ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2075       }
2076     }
2077   }
2078 
2079   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2080   nooffprocentries    = B->nooffprocentries;
2081   B->nooffprocentries = PETSC_TRUE;
2082   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2083   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2084   B->nooffprocentries = nooffprocentries;
2085 
2086   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 /*MC
2091    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2092    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2093    the matrix is stored.
2094 
2095    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2096    can call MatSetOption(Mat, MAT_HERMITIAN);
2097 
2098    Options Database Keys:
2099 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2100 
2101    Notes:
2102      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2103      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2104 
2105    Level: beginner
2106 
2107 .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType
2108 M*/
2109 
2110 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2111 {
2112   Mat_MPISBAIJ   *b;
2113   PetscErrorCode ierr;
2114   PetscBool      flg = PETSC_FALSE;
2115 
2116   PetscFunctionBegin;
2117   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2118   B->data = (void*)b;
2119   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2120 
2121   B->ops->destroy = MatDestroy_MPISBAIJ;
2122   B->ops->view    = MatView_MPISBAIJ;
2123   B->assembled    = PETSC_FALSE;
2124   B->insertmode   = NOT_SET_VALUES;
2125 
2126   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
2127   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRMPI(ierr);
2128 
2129   /* build local table of row and column ownerships */
2130   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2131 
2132   /* build cache for off array entries formed */
2133   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2134 
2135   b->donotstash  = PETSC_FALSE;
2136   b->colmap      = NULL;
2137   b->garray      = NULL;
2138   b->roworiented = PETSC_TRUE;
2139 
2140   /* stuff used in block assembly */
2141   b->barray = NULL;
2142 
2143   /* stuff used for matrix vector multiply */
2144   b->lvec    = NULL;
2145   b->Mvctx   = NULL;
2146   b->slvec0  = NULL;
2147   b->slvec0b = NULL;
2148   b->slvec1  = NULL;
2149   b->slvec1a = NULL;
2150   b->slvec1b = NULL;
2151   b->sMvctx  = NULL;
2152 
2153   /* stuff for MatGetRow() */
2154   b->rowindices   = NULL;
2155   b->rowvalues    = NULL;
2156   b->getrowactive = PETSC_FALSE;
2157 
2158   /* hash table stuff */
2159   b->ht           = NULL;
2160   b->hd           = NULL;
2161   b->ht_size      = 0;
2162   b->ht_flag      = PETSC_FALSE;
2163   b->ht_fact      = 0;
2164   b->ht_total_ct  = 0;
2165   b->ht_insert_ct = 0;
2166 
2167   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2168   b->ijonly = PETSC_FALSE;
2169 
2170   b->in_loc = NULL;
2171   b->v_loc  = NULL;
2172   b->n_loc  = 0;
2173 
2174   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2175   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2176   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2177   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2178 #if defined(PETSC_HAVE_ELEMENTAL)
2179   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2180 #endif
2181 #if defined(PETSC_HAVE_SCALAPACK)
2182   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK);CHKERRQ(ierr);
2183 #endif
2184   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2185   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2186 
2187   B->symmetric                  = PETSC_TRUE;
2188   B->structurally_symmetric     = PETSC_TRUE;
2189   B->symmetric_set              = PETSC_TRUE;
2190   B->structurally_symmetric_set = PETSC_TRUE;
2191   B->symmetric_eternal          = PETSC_TRUE;
2192 #if defined(PETSC_USE_COMPLEX)
2193   B->hermitian                  = PETSC_FALSE;
2194   B->hermitian_set              = PETSC_FALSE;
2195 #else
2196   B->hermitian                  = PETSC_TRUE;
2197   B->hermitian_set              = PETSC_TRUE;
2198 #endif
2199 
2200   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2201   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2202   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2203   if (flg) {
2204     PetscReal fact = 1.39;
2205     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2206     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2207     if (fact <= 1.0) fact = 1.39;
2208     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2209     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2210   }
2211   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2212   PetscFunctionReturn(0);
2213 }
2214 
2215 /*MC
2216    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2217 
2218    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2219    and MATMPISBAIJ otherwise.
2220 
2221    Options Database Keys:
2222 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2223 
2224   Level: beginner
2225 
2226 .seealso: MatCreateMPISBAIJ, MATSEQSBAIJ, MATMPISBAIJ
2227 M*/
2228 
2229 /*@C
2230    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2231    the user should preallocate the matrix storage by setting the parameters
2232    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2233    performance can be increased by more than a factor of 50.
2234 
2235    Collective on Mat
2236 
2237    Input Parameters:
2238 +  B - the matrix
2239 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2240           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2241 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2242            submatrix  (same for all local rows)
2243 .  d_nnz - array containing the number of block nonzeros in the various block rows
2244            in the upper triangular and diagonal part of the in diagonal portion of the local
2245            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2246            for the diagonal entry and set a value even if it is zero.
2247 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2248            submatrix (same for all local rows).
2249 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2250            off-diagonal portion of the local submatrix that is right of the diagonal
2251            (possibly different for each block row) or NULL.
2252 
2253 
2254    Options Database Keys:
2255 +   -mat_no_unroll - uses code that does not unroll the loops in the
2256                      block calculations (much slower)
2257 -   -mat_block_size - size of the blocks to use
2258 
2259    Notes:
2260 
2261    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2262    than it must be used on all processors that share the object for that argument.
2263 
2264    If the *_nnz parameter is given then the *_nz parameter is ignored
2265 
2266    Storage Information:
2267    For a square global matrix we define each processor's diagonal portion
2268    to be its local rows and the corresponding columns (a square submatrix);
2269    each processor's off-diagonal portion encompasses the remainder of the
2270    local matrix (a rectangular submatrix).
2271 
2272    The user can specify preallocated storage for the diagonal part of
2273    the local submatrix with either d_nz or d_nnz (not both).  Set
2274    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2275    memory allocation.  Likewise, specify preallocated storage for the
2276    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2277 
2278    You can call MatGetInfo() to get information on how effective the preallocation was;
2279    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2280    You can also run with the option -info and look for messages with the string
2281    malloc in them to see if additional memory allocation was needed.
2282 
2283    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2284    the figure below we depict these three local rows and all columns (0-11).
2285 
2286 .vb
2287            0 1 2 3 4 5 6 7 8 9 10 11
2288           --------------------------
2289    row 3  |. . . d d d o o o o  o  o
2290    row 4  |. . . d d d o o o o  o  o
2291    row 5  |. . . d d d o o o o  o  o
2292           --------------------------
2293 .ve
2294 
2295    Thus, any entries in the d locations are stored in the d (diagonal)
2296    submatrix, and any entries in the o locations are stored in the
2297    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2298    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2299 
2300    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2301    plus the diagonal part of the d matrix,
2302    and o_nz should indicate the number of block nonzeros per row in the o matrix
2303 
2304    In general, for PDE problems in which most nonzeros are near the diagonal,
2305    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2306    or you will get TERRIBLE performance; see the users' manual chapter on
2307    matrices.
2308 
2309    Level: intermediate
2310 
2311 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2312 @*/
2313 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2314 {
2315   PetscErrorCode ierr;
2316 
2317   PetscFunctionBegin;
2318   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2319   PetscValidType(B,1);
2320   PetscValidLogicalCollectiveInt(B,bs,2);
2321   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 /*@C
2326    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2327    (block compressed row).  For good matrix assembly performance
2328    the user should preallocate the matrix storage by setting the parameters
2329    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2330    performance can be increased by more than a factor of 50.
2331 
2332    Collective
2333 
2334    Input Parameters:
2335 +  comm - MPI communicator
2336 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2337           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2338 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2339            This value should be the same as the local size used in creating the
2340            y vector for the matrix-vector product y = Ax.
2341 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2342            This value should be the same as the local size used in creating the
2343            x vector for the matrix-vector product y = Ax.
2344 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2345 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2346 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2347            submatrix  (same for all local rows)
2348 .  d_nnz - array containing the number of block nonzeros in the various block rows
2349            in the upper triangular portion of the in diagonal portion of the local
2350            (possibly different for each block block row) or NULL.
2351            If you plan to factor the matrix you must leave room for the diagonal entry and
2352            set its value even if it is zero.
2353 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2354            submatrix (same for all local rows).
2355 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2356            off-diagonal portion of the local submatrix (possibly different for
2357            each block row) or NULL.
2358 
2359    Output Parameter:
2360 .  A - the matrix
2361 
2362    Options Database Keys:
2363 +   -mat_no_unroll - uses code that does not unroll the loops in the
2364                      block calculations (much slower)
2365 .   -mat_block_size - size of the blocks to use
2366 -   -mat_mpi - use the parallel matrix data structures even on one processor
2367                (defaults to using SeqBAIJ format on one processor)
2368 
2369    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2370    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2371    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2372 
2373    Notes:
2374    The number of rows and columns must be divisible by blocksize.
2375    This matrix type does not support complex Hermitian operation.
2376 
2377    The user MUST specify either the local or global matrix dimensions
2378    (possibly both).
2379 
2380    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2381    than it must be used on all processors that share the object for that argument.
2382 
2383    If the *_nnz parameter is given then the *_nz parameter is ignored
2384 
2385    Storage Information:
2386    For a square global matrix we define each processor's diagonal portion
2387    to be its local rows and the corresponding columns (a square submatrix);
2388    each processor's off-diagonal portion encompasses the remainder of the
2389    local matrix (a rectangular submatrix).
2390 
2391    The user can specify preallocated storage for the diagonal part of
2392    the local submatrix with either d_nz or d_nnz (not both).  Set
2393    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2394    memory allocation.  Likewise, specify preallocated storage for the
2395    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2396 
2397    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2398    the figure below we depict these three local rows and all columns (0-11).
2399 
2400 .vb
2401            0 1 2 3 4 5 6 7 8 9 10 11
2402           --------------------------
2403    row 3  |. . . d d d o o o o  o  o
2404    row 4  |. . . d d d o o o o  o  o
2405    row 5  |. . . d d d o o o o  o  o
2406           --------------------------
2407 .ve
2408 
2409    Thus, any entries in the d locations are stored in the d (diagonal)
2410    submatrix, and any entries in the o locations are stored in the
2411    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2412    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2413 
2414    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2415    plus the diagonal part of the d matrix,
2416    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2417    In general, for PDE problems in which most nonzeros are near the diagonal,
2418    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2419    or you will get TERRIBLE performance; see the users' manual chapter on
2420    matrices.
2421 
2422    Level: intermediate
2423 
2424 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2425 @*/
2426 
2427 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2428 {
2429   PetscErrorCode ierr;
2430   PetscMPIInt    size;
2431 
2432   PetscFunctionBegin;
2433   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2434   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2435   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2436   if (size > 1) {
2437     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2438     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2439   } else {
2440     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2441     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2442   }
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 
2447 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2448 {
2449   Mat            mat;
2450   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2451   PetscErrorCode ierr;
2452   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2453   PetscScalar    *array;
2454 
2455   PetscFunctionBegin;
2456   *newmat = NULL;
2457 
2458   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2459   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2460   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2461   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2462   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2463 
2464   mat->factortype   = matin->factortype;
2465   mat->preallocated = PETSC_TRUE;
2466   mat->assembled    = PETSC_TRUE;
2467   mat->insertmode   = NOT_SET_VALUES;
2468 
2469   a      = (Mat_MPISBAIJ*)mat->data;
2470   a->bs2 = oldmat->bs2;
2471   a->mbs = oldmat->mbs;
2472   a->nbs = oldmat->nbs;
2473   a->Mbs = oldmat->Mbs;
2474   a->Nbs = oldmat->Nbs;
2475 
2476   a->size         = oldmat->size;
2477   a->rank         = oldmat->rank;
2478   a->donotstash   = oldmat->donotstash;
2479   a->roworiented  = oldmat->roworiented;
2480   a->rowindices   = NULL;
2481   a->rowvalues    = NULL;
2482   a->getrowactive = PETSC_FALSE;
2483   a->barray       = NULL;
2484   a->rstartbs     = oldmat->rstartbs;
2485   a->rendbs       = oldmat->rendbs;
2486   a->cstartbs     = oldmat->cstartbs;
2487   a->cendbs       = oldmat->cendbs;
2488 
2489   /* hash table stuff */
2490   a->ht           = NULL;
2491   a->hd           = NULL;
2492   a->ht_size      = 0;
2493   a->ht_flag      = oldmat->ht_flag;
2494   a->ht_fact      = oldmat->ht_fact;
2495   a->ht_total_ct  = 0;
2496   a->ht_insert_ct = 0;
2497 
2498   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr);
2499   if (oldmat->colmap) {
2500 #if defined(PETSC_USE_CTABLE)
2501     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2502 #else
2503     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2504     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2505     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
2506 #endif
2507   } else a->colmap = NULL;
2508 
2509   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2510     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2511     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2512     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
2513   } else a->garray = NULL;
2514 
2515   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2516   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2517   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2518   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2519   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2520 
2521   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2522   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2523   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2524   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2525 
2526   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2527   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2528   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2529   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2530   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2531   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2532   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2533   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2534   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2535   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2536   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2537   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2538   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2539 
2540   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2541   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2542   a->sMvctx = oldmat->sMvctx;
2543   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2544 
2545   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2546   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2547   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2548   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2549   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2550   *newmat = mat;
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2555 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2556 
2557 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
2558 {
2559   PetscErrorCode ierr;
2560   PetscBool      isbinary;
2561 
2562   PetscFunctionBegin;
2563   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2564   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
2565   ierr = MatLoad_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 /*XXXXX@
2570    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2571 
2572    Input Parameters:
2573 .  mat  - the matrix
2574 .  fact - factor
2575 
2576    Not Collective on Mat, each process can have a different hash factor
2577 
2578    Level: advanced
2579 
2580   Notes:
2581    This can also be set by the command line option: -mat_use_hash_table fact
2582 
2583 .seealso: MatSetOption()
2584 @XXXXX*/
2585 
2586 
2587 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2588 {
2589   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2590   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2591   PetscReal      atmp;
2592   PetscReal      *work,*svalues,*rvalues;
2593   PetscErrorCode ierr;
2594   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2595   PetscMPIInt    rank,size;
2596   PetscInt       *rowners_bs,dest,count,source;
2597   PetscScalar    *va;
2598   MatScalar      *ba;
2599   MPI_Status     stat;
2600 
2601   PetscFunctionBegin;
2602   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2603   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2604   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2605 
2606   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
2607   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
2608 
2609   bs  = A->rmap->bs;
2610   mbs = a->mbs;
2611   Mbs = a->Mbs;
2612   ba  = b->a;
2613   bi  = b->i;
2614   bj  = b->j;
2615 
2616   /* find ownerships */
2617   rowners_bs = A->rmap->range;
2618 
2619   /* each proc creates an array to be distributed */
2620   ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2621 
2622   /* row_max for B */
2623   if (rank != size-1) {
2624     for (i=0; i<mbs; i++) {
2625       ncols = bi[1] - bi[0]; bi++;
2626       brow  = bs*i;
2627       for (j=0; j<ncols; j++) {
2628         bcol = bs*(*bj);
2629         for (kcol=0; kcol<bs; kcol++) {
2630           col  = bcol + kcol;                /* local col index */
2631           col += rowners_bs[rank+1];      /* global col index */
2632           for (krow=0; krow<bs; krow++) {
2633             atmp = PetscAbsScalar(*ba); ba++;
2634             row  = brow + krow;   /* local row index */
2635             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2636             if (work[col] < atmp) work[col] = atmp;
2637           }
2638         }
2639         bj++;
2640       }
2641     }
2642 
2643     /* send values to its owners */
2644     for (dest=rank+1; dest<size; dest++) {
2645       svalues = work + rowners_bs[dest];
2646       count   = rowners_bs[dest+1]-rowners_bs[dest];
2647       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2648     }
2649   }
2650 
2651   /* receive values */
2652   if (rank) {
2653     rvalues = work;
2654     count   = rowners_bs[rank+1]-rowners_bs[rank];
2655     for (source=0; source<rank; source++) {
2656       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRMPI(ierr);
2657       /* process values */
2658       for (i=0; i<count; i++) {
2659         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2660       }
2661     }
2662   }
2663 
2664   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2665   ierr = PetscFree(work);CHKERRQ(ierr);
2666   PetscFunctionReturn(0);
2667 }
2668 
2669 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2670 {
2671   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2672   PetscErrorCode    ierr;
2673   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2674   PetscScalar       *x,*ptr,*from;
2675   Vec               bb1;
2676   const PetscScalar *b;
2677 
2678   PetscFunctionBegin;
2679   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2680   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2681 
2682   if (flag == SOR_APPLY_UPPER) {
2683     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2684     PetscFunctionReturn(0);
2685   }
2686 
2687   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2688     if (flag & SOR_ZERO_INITIAL_GUESS) {
2689       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2690       its--;
2691     }
2692 
2693     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2694     while (its--) {
2695 
2696       /* lower triangular part: slvec0b = - B^T*xx */
2697       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2698 
2699       /* copy xx into slvec0a */
2700       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2701       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2702       ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr);
2703       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2704 
2705       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2706 
2707       /* copy bb into slvec1a */
2708       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2709       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2710       ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr);
2711       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2712 
2713       /* set slvec1b = 0 */
2714       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2715 
2716       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2717       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2718       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2719       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2720 
2721       /* upper triangular part: bb1 = bb1 - B*x */
2722       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2723 
2724       /* local diagonal sweep */
2725       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2726     }
2727     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2728   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2729     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2730   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2731     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2732   } else if (flag & SOR_EISENSTAT) {
2733     Vec               xx1;
2734     PetscBool         hasop;
2735     const PetscScalar *diag;
2736     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2737     PetscInt          i,n;
2738 
2739     if (!mat->xx1) {
2740       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2741       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2742     }
2743     xx1 = mat->xx1;
2744     bb1 = mat->bb1;
2745 
2746     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2747 
2748     if (!mat->diag) {
2749       /* this is wrong for same matrix with new nonzero values */
2750       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2751       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2752     }
2753     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2754 
2755     if (hasop) {
2756       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2757       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2758     } else {
2759       /*
2760           These two lines are replaced by code that may be a bit faster for a good compiler
2761       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2762       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2763       */
2764       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2765       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2766       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2767       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2768       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2769       if (omega == 1.0) {
2770         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2771         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2772       } else {
2773         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2774         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2775       }
2776       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2777       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2778       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2779       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2780     }
2781 
2782     /* multiply off-diagonal portion of matrix */
2783     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2784     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2785     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2786     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2787     ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
2788     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2789     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2790     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2791     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2792     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2793 
2794     /* local sweep */
2795     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2796     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2797   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2798   PetscFunctionReturn(0);
2799 }
2800 
2801 /*@
2802      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2803          CSR format the local rows.
2804 
2805    Collective
2806 
2807    Input Parameters:
2808 +  comm - MPI communicator
2809 .  bs - the block size, only a block size of 1 is supported
2810 .  m - number of local rows (Cannot be PETSC_DECIDE)
2811 .  n - This value should be the same as the local size used in creating the
2812        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2813        calculated if N is given) For square matrices n is almost always m.
2814 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2815 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2816 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2817 .   j - column indices
2818 -   a - matrix values
2819 
2820    Output Parameter:
2821 .   mat - the matrix
2822 
2823    Level: intermediate
2824 
2825    Notes:
2826        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2827      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2828      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2829 
2830        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2831 
2832 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2833           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2834 @*/
2835 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2836 {
2837   PetscErrorCode ierr;
2838 
2839 
2840   PetscFunctionBegin;
2841   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2842   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2843   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2844   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2845   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2846   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2847   PetscFunctionReturn(0);
2848 }
2849 
2850 
2851 /*@C
2852    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2853 
2854    Collective
2855 
2856    Input Parameters:
2857 +  B - the matrix
2858 .  bs - the block size
2859 .  i - the indices into j for the start of each local row (starts with zero)
2860 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2861 -  v - optional values in the matrix
2862 
2863    Level: advanced
2864 
2865    Notes:
2866    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2867    and usually the numerical values as well
2868 
2869    Any entries below the diagonal are ignored
2870 
2871 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2872 @*/
2873 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2874 {
2875   PetscErrorCode ierr;
2876 
2877   PetscFunctionBegin;
2878   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2879   PetscFunctionReturn(0);
2880 }
2881 
2882 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2883 {
2884   PetscErrorCode ierr;
2885   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2886   PetscInt       *indx;
2887   PetscScalar    *values;
2888 
2889   PetscFunctionBegin;
2890   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2891   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2892     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2893     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
2894     PetscInt       *bindx,rmax=a->rmax,j;
2895     PetscMPIInt    rank,size;
2896 
2897     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2898     mbs = m/bs; Nbs = N/cbs;
2899     if (n == PETSC_DECIDE) {
2900       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
2901     }
2902     nbs = n/cbs;
2903 
2904     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2905     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
2906 
2907     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
2908     ierr = MPI_Comm_rank(comm,&size);CHKERRMPI(ierr);
2909     if (rank == size-1) {
2910       /* Check sum(nbs) = Nbs */
2911       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
2912     }
2913 
2914     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
2915     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2916     for (i=0; i<mbs; i++) {
2917       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2918       nnz  = nnz/bs;
2919       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2920       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2921       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2922     }
2923     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2924     ierr = PetscFree(bindx);CHKERRQ(ierr);
2925 
2926     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2927     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2928     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2929     ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr);
2930     ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
2931     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2932     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2933   }
2934 
2935   /* numeric phase */
2936   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2937   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
2938 
2939   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2940   for (i=0; i<m; i++) {
2941     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2942     Ii   = i + rstart;
2943     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2944     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2945   }
2946   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2947   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2948   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2949   PetscFunctionReturn(0);
2950 }
2951