xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision fa9d2cbc76144de3c0f6596cc61c8f12fb157083)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 /* This could be moved to matimpl.h */
12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
13 {
14   Mat            preallocator;
15   PetscInt       r,rstart,rend;
16   PetscInt       bs,i,m,n,M,N;
17   PetscBool      cong = PETSC_TRUE;
18   PetscErrorCode ierr;
19 
20   PetscFunctionBegin;
21   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
22   PetscValidLogicalCollectiveInt(B,nm,2);
23   for (i = 0; i < nm; i++) {
24     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
25     ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr);
26     if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
27   }
28   PetscValidLogicalCollectiveBool(B,fill,5);
29   ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr);
30   ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr);
31   ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
32   ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr);
33   ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
34   ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr);
35   ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
36   ierr = MatSetUp(preallocator);CHKERRQ(ierr);
37   ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
38   for (r = rstart; r < rend; ++r) {
39     PetscInt          ncols;
40     const PetscInt    *row;
41     const PetscScalar *vals;
42 
43     for (i = 0; i < nm; i++) {
44       ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
45       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
46       if (symm && symm[i]) {
47         ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
48       }
49       ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
50     }
51   }
52   ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
53   ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
54   ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr);
55   ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 
59 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
60 {
61   Mat            B;
62   PetscErrorCode ierr;
63   PetscInt       r;
64 
65   PetscFunctionBegin;
66   if (reuse != MAT_REUSE_MATRIX) {
67     PetscBool symm = PETSC_TRUE,isdense;
68     PetscInt  bs;
69 
70     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
71     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
72     ierr = MatSetType(B,newtype);CHKERRQ(ierr);
73     ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
74     ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr);
75     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
76     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
77     ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr);
78     if (!isdense) {
79       ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
80       ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr);
81       ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
82     } else {
83       ierr = MatSetUp(B);CHKERRQ(ierr);
84     }
85   } else {
86     B    = *newmat;
87     ierr = MatZeroEntries(B);CHKERRQ(ierr);
88   }
89 
90   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
91   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
92     PetscInt          ncols;
93     const PetscInt    *row;
94     const PetscScalar *vals;
95 
96     ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
97     ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
98 #if defined(PETSC_USE_COMPLEX)
99     if (A->hermitian) {
100       PetscInt i;
101       for (i = 0; i < ncols; i++) {
102         ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr);
103       }
104     } else {
105       ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
106     }
107 #else
108     ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
109 #endif
110     ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
111   }
112   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
113   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
114   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
115 
116   if (reuse == MAT_INPLACE_MATRIX) {
117     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
118   } else {
119     *newmat = B;
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
125 {
126   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
127   PetscErrorCode ierr;
128 
129   PetscFunctionBegin;
130   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
131   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
132   PetscFunctionReturn(0);
133 }
134 
135 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
136 {
137   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
138   PetscErrorCode ierr;
139 
140   PetscFunctionBegin;
141   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
142   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
143   PetscFunctionReturn(0);
144 }
145 
146 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
147   { \
148     brow = row/bs;  \
149     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
150     rmax = aimax[brow]; nrow = ailen[brow]; \
151     bcol = col/bs; \
152     ridx = row % bs; cidx = col % bs; \
153     low  = 0; high = nrow; \
154     while (high-low > 3) { \
155       t = (low+high)/2; \
156       if (rp[t] > bcol) high = t; \
157       else              low  = t; \
158     } \
159     for (_i=low; _i<high; _i++) { \
160       if (rp[_i] > bcol) break; \
161       if (rp[_i] == bcol) { \
162         bap = ap + bs2*_i + bs*cidx + ridx; \
163         if (addv == ADD_VALUES) *bap += value;  \
164         else                    *bap  = value;  \
165         goto a_noinsert; \
166       } \
167     } \
168     if (a->nonew == 1) goto a_noinsert; \
169     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
170     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
171     N = nrow++ - 1;  \
172     /* shift up all the later entries in this row */ \
173     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
174     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
175     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
176     rp[_i]                      = bcol;  \
177     ap[bs2*_i + bs*cidx + ridx] = value;  \
178     A->nonzerostate++;\
179 a_noinsert:; \
180     ailen[brow] = nrow; \
181   }
182 
183 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
184   { \
185     brow = row/bs;  \
186     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
187     rmax = bimax[brow]; nrow = bilen[brow]; \
188     bcol = col/bs; \
189     ridx = row % bs; cidx = col % bs; \
190     low  = 0; high = nrow; \
191     while (high-low > 3) { \
192       t = (low+high)/2; \
193       if (rp[t] > bcol) high = t; \
194       else              low  = t; \
195     } \
196     for (_i=low; _i<high; _i++) { \
197       if (rp[_i] > bcol) break; \
198       if (rp[_i] == bcol) { \
199         bap = ap + bs2*_i + bs*cidx + ridx; \
200         if (addv == ADD_VALUES) *bap += value;  \
201         else                    *bap  = value;  \
202         goto b_noinsert; \
203       } \
204     } \
205     if (b->nonew == 1) goto b_noinsert; \
206     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
207     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
208     N = nrow++ - 1;  \
209     /* shift up all the later entries in this row */ \
210     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
211     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
212     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \
213     rp[_i]                      = bcol;  \
214     ap[bs2*_i + bs*cidx + ridx] = value;  \
215     B->nonzerostate++;\
216 b_noinsert:; \
217     bilen[brow] = nrow; \
218   }
219 
220 /* Only add/insert a(i,j) with i<=j (blocks).
221    Any a(i,j) with i>j input by user is ingored or generates an error
222 */
223 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
224 {
225   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
226   MatScalar      value;
227   PetscBool      roworiented = baij->roworiented;
228   PetscErrorCode ierr;
229   PetscInt       i,j,row,col;
230   PetscInt       rstart_orig=mat->rmap->rstart;
231   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
232   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
233 
234   /* Some Variables required in the macro */
235   Mat          A     = baij->A;
236   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
237   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
238   MatScalar    *aa   =a->a;
239 
240   Mat         B     = baij->B;
241   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
242   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
243   MatScalar   *ba   =b->a;
244 
245   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
246   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
247   MatScalar *ap,*bap;
248 
249   /* for stash */
250   PetscInt  n_loc, *in_loc = NULL;
251   MatScalar *v_loc = NULL;
252 
253   PetscFunctionBegin;
254   if (!baij->donotstash) {
255     if (n > baij->n_loc) {
256       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
257       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
258       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
259       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
260 
261       baij->n_loc = n;
262     }
263     in_loc = baij->in_loc;
264     v_loc  = baij->v_loc;
265   }
266 
267   for (i=0; i<m; i++) {
268     if (im[i] < 0) continue;
269 #if defined(PETSC_USE_DEBUG)
270     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
271 #endif
272     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
273       row = im[i] - rstart_orig;              /* local row index */
274       for (j=0; j<n; j++) {
275         if (im[i]/bs > in[j]/bs) {
276           if (a->ignore_ltriangular) {
277             continue;    /* ignore lower triangular blocks */
278           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
279         }
280         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
281           col  = in[j] - cstart_orig;         /* local col index */
282           brow = row/bs; bcol = col/bs;
283           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
284           if (roworiented) value = v[i*n+j];
285           else             value = v[i+j*m];
286           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
287           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
288         } else if (in[j] < 0) continue;
289 #if defined(PETSC_USE_DEBUG)
290         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
291 #endif
292         else {  /* off-diag entry (B) */
293           if (mat->was_assembled) {
294             if (!baij->colmap) {
295               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
296             }
297 #if defined(PETSC_USE_CTABLE)
298             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
299             col  = col - 1;
300 #else
301             col = baij->colmap[in[j]/bs] - 1;
302 #endif
303             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
304               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
305               col  =  in[j];
306               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
307               B    = baij->B;
308               b    = (Mat_SeqBAIJ*)(B)->data;
309               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
310               ba   = b->a;
311             } else col += in[j]%bs;
312           } else col = in[j];
313           if (roworiented) value = v[i*n+j];
314           else             value = v[i+j*m];
315           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
316           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
317         }
318       }
319     } else {  /* off processor entry */
320       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
321       if (!baij->donotstash) {
322         mat->assembled = PETSC_FALSE;
323         n_loc          = 0;
324         for (j=0; j<n; j++) {
325           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
326           in_loc[n_loc] = in[j];
327           if (roworiented) {
328             v_loc[n_loc] = v[i*n+j];
329           } else {
330             v_loc[n_loc] = v[j*m+i];
331           }
332           n_loc++;
333         }
334         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
335       }
336     }
337   }
338   PetscFunctionReturn(0);
339 }
340 
341 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
342 {
343   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
344   PetscErrorCode    ierr;
345   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
346   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
347   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
348   PetscBool         roworiented=a->roworiented;
349   const PetscScalar *value     = v;
350   MatScalar         *ap,*aa = a->a,*bap;
351 
352   PetscFunctionBegin;
353   if (col < row) {
354     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
355     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
356   }
357   rp   = aj + ai[row];
358   ap   = aa + bs2*ai[row];
359   rmax = imax[row];
360   nrow = ailen[row];
361   value = v;
362   low   = 0;
363   high  = nrow;
364 
365   while (high-low > 7) {
366     t = (low+high)/2;
367     if (rp[t] > col) high = t;
368     else             low  = t;
369   }
370   for (i=low; i<high; i++) {
371     if (rp[i] > col) break;
372     if (rp[i] == col) {
373       bap = ap +  bs2*i;
374       if (roworiented) {
375         if (is == ADD_VALUES) {
376           for (ii=0; ii<bs; ii++) {
377             for (jj=ii; jj<bs2; jj+=bs) {
378               bap[jj] += *value++;
379             }
380           }
381         } else {
382           for (ii=0; ii<bs; ii++) {
383             for (jj=ii; jj<bs2; jj+=bs) {
384               bap[jj] = *value++;
385             }
386           }
387         }
388       } else {
389         if (is == ADD_VALUES) {
390           for (ii=0; ii<bs; ii++) {
391             for (jj=0; jj<bs; jj++) {
392               *bap++ += *value++;
393             }
394           }
395         } else {
396           for (ii=0; ii<bs; ii++) {
397             for (jj=0; jj<bs; jj++) {
398               *bap++  = *value++;
399             }
400           }
401         }
402       }
403       goto noinsert2;
404     }
405   }
406   if (nonew == 1) goto noinsert2;
407   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
408   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
409   N = nrow++ - 1; high++;
410   /* shift up all the later entries in this row */
411   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
412   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
413   rp[i] = col;
414   bap   = ap +  bs2*i;
415   if (roworiented) {
416     for (ii=0; ii<bs; ii++) {
417       for (jj=ii; jj<bs2; jj+=bs) {
418         bap[jj] = *value++;
419       }
420     }
421   } else {
422     for (ii=0; ii<bs; ii++) {
423       for (jj=0; jj<bs; jj++) {
424         *bap++ = *value++;
425       }
426     }
427   }
428   noinsert2:;
429   ailen[row] = nrow;
430   PetscFunctionReturn(0);
431 }
432 
433 /*
434    This routine is exactly duplicated in mpibaij.c
435 */
436 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
437 {
438   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
439   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
440   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
441   PetscErrorCode    ierr;
442   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
443   PetscBool         roworiented=a->roworiented;
444   const PetscScalar *value     = v;
445   MatScalar         *ap,*aa = a->a,*bap;
446 
447   PetscFunctionBegin;
448   rp   = aj + ai[row];
449   ap   = aa + bs2*ai[row];
450   rmax = imax[row];
451   nrow = ailen[row];
452   low  = 0;
453   high = nrow;
454   value = v;
455   while (high-low > 7) {
456     t = (low+high)/2;
457     if (rp[t] > col) high = t;
458     else             low  = t;
459   }
460   for (i=low; i<high; i++) {
461     if (rp[i] > col) break;
462     if (rp[i] == col) {
463       bap = ap +  bs2*i;
464       if (roworiented) {
465         if (is == ADD_VALUES) {
466           for (ii=0; ii<bs; ii++) {
467             for (jj=ii; jj<bs2; jj+=bs) {
468               bap[jj] += *value++;
469             }
470           }
471         } else {
472           for (ii=0; ii<bs; ii++) {
473             for (jj=ii; jj<bs2; jj+=bs) {
474               bap[jj] = *value++;
475             }
476           }
477         }
478       } else {
479         if (is == ADD_VALUES) {
480           for (ii=0; ii<bs; ii++,value+=bs) {
481             for (jj=0; jj<bs; jj++) {
482               bap[jj] += value[jj];
483             }
484             bap += bs;
485           }
486         } else {
487           for (ii=0; ii<bs; ii++,value+=bs) {
488             for (jj=0; jj<bs; jj++) {
489               bap[jj]  = value[jj];
490             }
491             bap += bs;
492           }
493         }
494       }
495       goto noinsert2;
496     }
497   }
498   if (nonew == 1) goto noinsert2;
499   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
500   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
501   N = nrow++ - 1; high++;
502   /* shift up all the later entries in this row */
503   ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
504   ierr  = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
505   rp[i] = col;
506   bap   = ap +  bs2*i;
507   if (roworiented) {
508     for (ii=0; ii<bs; ii++) {
509       for (jj=ii; jj<bs2; jj+=bs) {
510         bap[jj] = *value++;
511       }
512     }
513   } else {
514     for (ii=0; ii<bs; ii++) {
515       for (jj=0; jj<bs; jj++) {
516         *bap++ = *value++;
517       }
518     }
519   }
520   noinsert2:;
521   ailen[row] = nrow;
522   PetscFunctionReturn(0);
523 }
524 
525 /*
526     This routine could be optimized by removing the need for the block copy below and passing stride information
527   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
528 */
529 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
530 {
531   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
532   const MatScalar *value;
533   MatScalar       *barray     =baij->barray;
534   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
535   PetscErrorCode  ierr;
536   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
537   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
538   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
539 
540   PetscFunctionBegin;
541   if (!barray) {
542     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
543     baij->barray = barray;
544   }
545 
546   if (roworiented) {
547     stepval = (n-1)*bs;
548   } else {
549     stepval = (m-1)*bs;
550   }
551   for (i=0; i<m; i++) {
552     if (im[i] < 0) continue;
553 #if defined(PETSC_USE_DEBUG)
554     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
555 #endif
556     if (im[i] >= rstart && im[i] < rend) {
557       row = im[i] - rstart;
558       for (j=0; j<n; j++) {
559         if (im[i] > in[j]) {
560           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
561           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
562         }
563         /* If NumCol = 1 then a copy is not required */
564         if ((roworiented) && (n == 1)) {
565           barray = (MatScalar*) v + i*bs2;
566         } else if ((!roworiented) && (m == 1)) {
567           barray = (MatScalar*) v + j*bs2;
568         } else { /* Here a copy is required */
569           if (roworiented) {
570             value = v + i*(stepval+bs)*bs + j*bs;
571           } else {
572             value = v + j*(stepval+bs)*bs + i*bs;
573           }
574           for (ii=0; ii<bs; ii++,value+=stepval) {
575             for (jj=0; jj<bs; jj++) {
576               *barray++ = *value++;
577             }
578           }
579           barray -=bs2;
580         }
581 
582         if (in[j] >= cstart && in[j] < cend) {
583           col  = in[j] - cstart;
584           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
585         } else if (in[j] < 0) continue;
586 #if defined(PETSC_USE_DEBUG)
587         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
588 #endif
589         else {
590           if (mat->was_assembled) {
591             if (!baij->colmap) {
592               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
593             }
594 
595 #if defined(PETSC_USE_DEBUG)
596 #if defined(PETSC_USE_CTABLE)
597             { PetscInt data;
598               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
599               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
600             }
601 #else
602             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
603 #endif
604 #endif
605 #if defined(PETSC_USE_CTABLE)
606             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
607             col  = (col - 1)/bs;
608 #else
609             col = (baij->colmap[in[j]] - 1)/bs;
610 #endif
611             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
612               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
613               col  = in[j];
614             }
615           } else col = in[j];
616           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!baij->donotstash) {
622         if (roworiented) {
623           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
624         } else {
625           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
626         }
627       }
628     }
629   }
630   PetscFunctionReturn(0);
631 }
632 
633 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
634 {
635   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
636   PetscErrorCode ierr;
637   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
638   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
639 
640   PetscFunctionBegin;
641   for (i=0; i<m; i++) {
642     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
643     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
644     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
645       row = idxm[i] - bsrstart;
646       for (j=0; j<n; j++) {
647         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
648         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
649         if (idxn[j] >= bscstart && idxn[j] < bscend) {
650           col  = idxn[j] - bscstart;
651           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
652         } else {
653           if (!baij->colmap) {
654             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
655           }
656 #if defined(PETSC_USE_CTABLE)
657           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
658           data--;
659 #else
660           data = baij->colmap[idxn[j]/bs]-1;
661 #endif
662           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
663           else {
664             col  = data + idxn[j]%bs;
665             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
666           }
667         }
668       }
669     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
670   }
671   PetscFunctionReturn(0);
672 }
673 
674 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
675 {
676   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
677   PetscErrorCode ierr;
678   PetscReal      sum[2],*lnorm2;
679 
680   PetscFunctionBegin;
681   if (baij->size == 1) {
682     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
683   } else {
684     if (type == NORM_FROBENIUS) {
685       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
686       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
687       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
688       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
689       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
690       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
691       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
692       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
693     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
694       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
695       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
696       PetscReal    *rsum,*rsum2,vabs;
697       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
698       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
699       MatScalar    *v;
700 
701       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
702       ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr);
703       /* Amat */
704       v = amat->a; jj = amat->j;
705       for (brow=0; brow<mbs; brow++) {
706         grow = bs*(rstart + brow);
707         nz   = amat->i[brow+1] - amat->i[brow];
708         for (bcol=0; bcol<nz; bcol++) {
709           gcol = bs*(rstart + *jj); jj++;
710           for (col=0; col<bs; col++) {
711             for (row=0; row<bs; row++) {
712               vabs            = PetscAbsScalar(*v); v++;
713               rsum[gcol+col] += vabs;
714               /* non-diagonal block */
715               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
716             }
717           }
718         }
719         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
720       }
721       /* Bmat */
722       v = bmat->a; jj = bmat->j;
723       for (brow=0; brow<mbs; brow++) {
724         grow = bs*(rstart + brow);
725         nz = bmat->i[brow+1] - bmat->i[brow];
726         for (bcol=0; bcol<nz; bcol++) {
727           gcol = bs*garray[*jj]; jj++;
728           for (col=0; col<bs; col++) {
729             for (row=0; row<bs; row++) {
730               vabs            = PetscAbsScalar(*v); v++;
731               rsum[gcol+col] += vabs;
732               rsum[grow+row] += vabs;
733             }
734           }
735         }
736         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
737       }
738       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
739       *norm = 0.0;
740       for (col=0; col<mat->cmap->N; col++) {
741         if (rsum2[col] > *norm) *norm = rsum2[col];
742       }
743       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
744     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
745   }
746   PetscFunctionReturn(0);
747 }
748 
749 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
750 {
751   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
752   PetscErrorCode ierr;
753   PetscInt       nstash,reallocs;
754 
755   PetscFunctionBegin;
756   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
757 
758   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
759   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
760   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
761   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
762   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
763   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
768 {
769   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
770   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
771   PetscErrorCode ierr;
772   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
773   PetscInt       *row,*col;
774   PetscBool      other_disassembled;
775   PetscMPIInt    n;
776   PetscBool      r1,r2,r3;
777   MatScalar      *val;
778 
779   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
780   PetscFunctionBegin;
781   if (!baij->donotstash &&  !mat->nooffprocentries) {
782     while (1) {
783       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
784       if (!flg) break;
785 
786       for (i=0; i<n;) {
787         /* Now identify the consecutive vals belonging to the same row */
788         for (j=i,rstart=row[j]; j<n; j++) {
789           if (row[j] != rstart) break;
790         }
791         if (j < n) ncols = j-i;
792         else       ncols = n-i;
793         /* Now assemble all these values with a single function call */
794         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
795         i    = j;
796       }
797     }
798     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
799     /* Now process the block-stash. Since the values are stashed column-oriented,
800        set the roworiented flag to column oriented, and after MatSetValues()
801        restore the original flags */
802     r1 = baij->roworiented;
803     r2 = a->roworiented;
804     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
805 
806     baij->roworiented = PETSC_FALSE;
807     a->roworiented    = PETSC_FALSE;
808 
809     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
810     while (1) {
811       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
812       if (!flg) break;
813 
814       for (i=0; i<n;) {
815         /* Now identify the consecutive vals belonging to the same row */
816         for (j=i,rstart=row[j]; j<n; j++) {
817           if (row[j] != rstart) break;
818         }
819         if (j < n) ncols = j-i;
820         else       ncols = n-i;
821         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
822         i    = j;
823       }
824     }
825     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
826 
827     baij->roworiented = r1;
828     a->roworiented    = r2;
829 
830     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
831   }
832 
833   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
834   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
835 
836   /* determine if any processor has disassembled, if so we must
837      also disassemble ourselfs, in order that we may reassemble. */
838   /*
839      if nonzero structure of submatrix B cannot change then we know that
840      no processor disassembled thus we can skip this stuff
841   */
842   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
843     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
844     if (mat->was_assembled && !other_disassembled) {
845       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
846     }
847   }
848 
849   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
850     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
851   }
852   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
853   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
854 
855   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
856 
857   baij->rowvalues = 0;
858 
859   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
860   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
861     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
862     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
863   }
864   PetscFunctionReturn(0);
865 }
866 
867 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
868 #include <petscdraw.h>
869 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
870 {
871   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
872   PetscErrorCode    ierr;
873   PetscInt          bs   = mat->rmap->bs;
874   PetscMPIInt       rank = baij->rank;
875   PetscBool         iascii,isdraw;
876   PetscViewer       sviewer;
877   PetscViewerFormat format;
878 
879   PetscFunctionBegin;
880   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
881   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
882   if (iascii) {
883     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
884     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
885       MatInfo info;
886       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
887       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
889       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
890       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
891       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
892       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
893       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
894       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
895       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
896       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
897       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
898       PetscFunctionReturn(0);
899     } else if (format == PETSC_VIEWER_ASCII_INFO) {
900       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
901       PetscFunctionReturn(0);
902     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
903       PetscFunctionReturn(0);
904     }
905   }
906 
907   if (isdraw) {
908     PetscDraw draw;
909     PetscBool isnull;
910     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
911     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
912     if (isnull) PetscFunctionReturn(0);
913   }
914 
915   {
916     /* assemble the entire matrix onto first processor. */
917     Mat          A;
918     Mat_SeqSBAIJ *Aloc;
919     Mat_SeqBAIJ  *Bloc;
920     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
921     MatScalar    *a;
922     const char   *matname;
923 
924     /* Should this be the same type as mat? */
925     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
926     if (!rank) {
927       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
928     } else {
929       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
930     }
931     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
932     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
933     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
934     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
935 
936     /* copy over the A part */
937     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
938     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
939     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
940 
941     for (i=0; i<mbs; i++) {
942       rvals[0] = bs*(baij->rstartbs + i);
943       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
944       for (j=ai[i]; j<ai[i+1]; j++) {
945         col = (baij->cstartbs+aj[j])*bs;
946         for (k=0; k<bs; k++) {
947           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
948           col++;
949           a += bs;
950         }
951       }
952     }
953     /* copy over the B part */
954     Bloc = (Mat_SeqBAIJ*)baij->B->data;
955     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
956     for (i=0; i<mbs; i++) {
957 
958       rvals[0] = bs*(baij->rstartbs + i);
959       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
960       for (j=ai[i]; j<ai[i+1]; j++) {
961         col = baij->garray[aj[j]]*bs;
962         for (k=0; k<bs; k++) {
963           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
964           col++;
965           a += bs;
966         }
967       }
968     }
969     ierr = PetscFree(rvals);CHKERRQ(ierr);
970     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
971     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
972     /*
973        Everyone has to call to draw the matrix since the graphics waits are
974        synchronized across all processors that share the PetscDraw object
975     */
976     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
977     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
978     if (!rank) {
979       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
980       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
981     }
982     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
983     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
984     ierr = MatDestroy(&A);CHKERRQ(ierr);
985   }
986   PetscFunctionReturn(0);
987 }
988 
989 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
990 {
991   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
992   Mat_SeqSBAIJ   *A = (Mat_SeqSBAIJ*)a->A->data;
993   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
994   PetscErrorCode ierr;
995   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
996   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
997   int            fd;
998   PetscScalar    *column_values;
999   FILE           *file;
1000   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1001   PetscInt       message_count,flowcontrolcount;
1002 
1003   PetscFunctionBegin;
1004   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1005   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1006   nz   = bs2*(A->nz + B->nz);
1007   rlen = mat->rmap->n;
1008   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1009   if (!rank) {
1010     header[0] = MAT_FILE_CLASSID;
1011     header[1] = mat->rmap->N;
1012     header[2] = mat->cmap->N;
1013 
1014     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1015     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1016     /* get largest number of rows any processor has */
1017     range = mat->rmap->range;
1018     for (i=1; i<size; i++) {
1019       rlen = PetscMax(rlen,range[i+1] - range[i]);
1020     }
1021   } else {
1022     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1023   }
1024 
1025   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1026   /* compute lengths of each row  */
1027   for (i=0; i<a->mbs; i++) {
1028     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1029   }
1030   /* store the row lengths to the file */
1031   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1032   if (!rank) {
1033     MPI_Status status;
1034     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1035     rlen = (range[1] - range[0])/bs;
1036     for (i=0; i<rlen; i++) {
1037       for (j=0; j<bs; j++) {
1038         row_lens[i*bs+j] = bs*crow_lens[i];
1039       }
1040     }
1041     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1042     for (i=1; i<size; i++) {
1043       rlen = (range[i+1] - range[i])/bs;
1044       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1045       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1046       for (k=0; k<rlen; k++) {
1047         for (j=0; j<bs; j++) {
1048           row_lens[k*bs+j] = bs*crow_lens[k];
1049         }
1050       }
1051       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1052     }
1053     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1054     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1055   } else {
1056     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1057     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1058     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1059   }
1060   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1061 
1062   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1063      information needed to make it for each row from a block row. This does require more communication but still not more than
1064      the communication needed for the nonzero values  */
1065   nzmax = nz; /*  space a largest processor needs */
1066   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1067   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1068   cnt   = 0;
1069   for (i=0; i<a->mbs; i++) {
1070     pcnt = cnt;
1071     for (j=B->i[i]; j<B->i[i+1]; j++) {
1072       if ((col = garray[B->j[j]]) > cstart) break;
1073       for (l=0; l<bs; l++) {
1074         column_indices[cnt++] = bs*col+l;
1075       }
1076     }
1077     for (k=A->i[i]; k<A->i[i+1]; k++) {
1078       for (l=0; l<bs; l++) {
1079         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1080       }
1081     }
1082     for (; j<B->i[i+1]; j++) {
1083       for (l=0; l<bs; l++) {
1084         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1085       }
1086     }
1087     len = cnt - pcnt;
1088     for (k=1; k<bs; k++) {
1089       ierr = PetscArraycpy(&column_indices[cnt],&column_indices[pcnt],len);CHKERRQ(ierr);
1090       cnt += len;
1091     }
1092   }
1093   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1094 
1095   /* store the columns to the file */
1096   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1097   if (!rank) {
1098     MPI_Status status;
1099     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1100     for (i=1; i<size; i++) {
1101       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1102       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1103       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1104       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1105     }
1106     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1107   } else {
1108     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1109     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1110     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1111     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1112   }
1113   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1114 
1115   /* load up the numerical values */
1116   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1117   cnt  = 0;
1118   for (i=0; i<a->mbs; i++) {
1119     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1120     for (j=B->i[i]; j<B->i[i+1]; j++) {
1121       if (garray[B->j[j]] > cstart) break;
1122       for (l=0; l<bs; l++) {
1123         for (ll=0; ll<bs; ll++) {
1124           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1125         }
1126       }
1127       cnt += bs;
1128     }
1129     for (k=A->i[i]; k<A->i[i+1]; k++) {
1130       for (l=0; l<bs; l++) {
1131         for (ll=0; ll<bs; ll++) {
1132           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1133         }
1134       }
1135       cnt += bs;
1136     }
1137     for (; j<B->i[i+1]; j++) {
1138       for (l=0; l<bs; l++) {
1139         for (ll=0; ll<bs; ll++) {
1140           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1141         }
1142       }
1143       cnt += bs;
1144     }
1145     cnt += (bs-1)*rlen;
1146   }
1147   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1148 
1149   /* store the column values to the file */
1150   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1151   if (!rank) {
1152     MPI_Status status;
1153     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1154     for (i=1; i<size; i++) {
1155       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1156       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1157       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1158       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1159     }
1160     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1161   } else {
1162     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1163     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1164     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1165     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1166   }
1167   ierr = PetscFree(column_values);CHKERRQ(ierr);
1168 
1169   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1170   if (file) {
1171     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1177 {
1178   PetscErrorCode ierr;
1179   PetscBool      iascii,isdraw,issocket,isbinary;
1180 
1181   PetscFunctionBegin;
1182   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1183   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1184   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1185   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1186   if (iascii || isdraw || issocket) {
1187     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1188   } else if (isbinary) {
1189     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1190   }
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1195 {
1196   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1197   PetscErrorCode ierr;
1198 
1199   PetscFunctionBegin;
1200 #if defined(PETSC_USE_LOG)
1201   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1202 #endif
1203   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1204   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1205   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1206   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1207 #if defined(PETSC_USE_CTABLE)
1208   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1209 #else
1210   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1211 #endif
1212   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1213   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1214   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1215   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1216   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1217   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1218   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1219   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1220   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1221   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1222   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1223   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1224   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1225   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1226   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1227 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1228   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1229 #endif
1230   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1231   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1232   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1233   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1234 
1235   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1240 #if defined(PETSC_HAVE_ELEMENTAL)
1241   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1242 #endif
1243   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1249 {
1250   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1251   PetscErrorCode    ierr;
1252   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1253   PetscScalar       *from;
1254   const PetscScalar *x;
1255 
1256   PetscFunctionBegin;
1257   /* diagonal part */
1258   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1259   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1260 
1261   /* subdiagonal part */
1262   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1263 
1264   /* copy x into the vec slvec0 */
1265   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1266   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1267 
1268   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1269   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1270   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1271 
1272   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1273   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1274   /* supperdiagonal part */
1275   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1280 {
1281   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1282   PetscErrorCode    ierr;
1283   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1284   PetscScalar       *from;
1285   const PetscScalar *x;
1286 
1287   PetscFunctionBegin;
1288   /* diagonal part */
1289   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1290   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1291 
1292   /* subdiagonal part */
1293   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1294 
1295   /* copy x into the vec slvec0 */
1296   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1297   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1298 
1299   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1300   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1301   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1302 
1303   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1304   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305   /* supperdiagonal part */
1306   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1311 {
1312   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1313   PetscErrorCode ierr;
1314   PetscInt       nt;
1315 
1316   PetscFunctionBegin;
1317   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1318   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1319 
1320   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1321   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1322 
1323   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1324   /* do diagonal part */
1325   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1326   /* do supperdiagonal part */
1327   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1328   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1329   /* do subdiagonal part */
1330   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1331   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1332   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1337 {
1338   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1339   PetscErrorCode    ierr;
1340   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1341   PetscScalar       *from,zero=0.0;
1342   const PetscScalar *x;
1343 
1344   PetscFunctionBegin;
1345   /* diagonal part */
1346   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1347   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1348 
1349   /* subdiagonal part */
1350   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1351 
1352   /* copy x into the vec slvec0 */
1353   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1354   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1355   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1356   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1357 
1358   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1359   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1360   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1361 
1362   /* supperdiagonal part */
1363   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1368 {
1369   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1370   PetscErrorCode    ierr;
1371   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1372   PetscScalar       *from,zero=0.0;
1373   const PetscScalar *x;
1374 
1375   PetscFunctionBegin;
1376   /* diagonal part */
1377   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1378   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1379 
1380   /* subdiagonal part */
1381   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1382 
1383   /* copy x into the vec slvec0 */
1384   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1385   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1386   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1387   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1388 
1389   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1390   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1391   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1392 
1393   /* supperdiagonal part */
1394   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1399 {
1400   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1401   PetscErrorCode ierr;
1402 
1403   PetscFunctionBegin;
1404   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1405   /* do diagonal part */
1406   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1407   /* do supperdiagonal part */
1408   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1409   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1410 
1411   /* do subdiagonal part */
1412   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1413   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1414   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 /*
1419   This only works correctly for square matrices where the subblock A->A is the
1420    diagonal block
1421 */
1422 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1423 {
1424   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1425   PetscErrorCode ierr;
1426 
1427   PetscFunctionBegin;
1428   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1429   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1434 {
1435   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1436   PetscErrorCode ierr;
1437 
1438   PetscFunctionBegin;
1439   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1440   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1445 {
1446   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1447   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1448   PetscErrorCode ierr;
1449   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1450   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1451   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1452 
1453   PetscFunctionBegin;
1454   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1455   mat->getrowactive = PETSC_TRUE;
1456 
1457   if (!mat->rowvalues && (idx || v)) {
1458     /*
1459         allocate enough space to hold information from the longest row.
1460     */
1461     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1462     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1463     PetscInt     max = 1,mbs = mat->mbs,tmp;
1464     for (i=0; i<mbs; i++) {
1465       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1466       if (max < tmp) max = tmp;
1467     }
1468     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1469   }
1470 
1471   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1472   lrow = row - brstart;  /* local row index */
1473 
1474   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1475   if (!v)   {pvA = 0; pvB = 0;}
1476   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1477   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1478   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1479   nztot = nzA + nzB;
1480 
1481   cmap = mat->garray;
1482   if (v  || idx) {
1483     if (nztot) {
1484       /* Sort by increasing column numbers, assuming A and B already sorted */
1485       PetscInt imark = -1;
1486       if (v) {
1487         *v = v_p = mat->rowvalues;
1488         for (i=0; i<nzB; i++) {
1489           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1490           else break;
1491         }
1492         imark = i;
1493         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1494         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1495       }
1496       if (idx) {
1497         *idx = idx_p = mat->rowindices;
1498         if (imark > -1) {
1499           for (i=0; i<imark; i++) {
1500             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1501           }
1502         } else {
1503           for (i=0; i<nzB; i++) {
1504             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1505             else break;
1506           }
1507           imark = i;
1508         }
1509         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1510         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1511       }
1512     } else {
1513       if (idx) *idx = 0;
1514       if (v)   *v   = 0;
1515     }
1516   }
1517   *nz  = nztot;
1518   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1519   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1524 {
1525   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1526 
1527   PetscFunctionBegin;
1528   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1529   baij->getrowactive = PETSC_FALSE;
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1534 {
1535   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1536   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1537 
1538   PetscFunctionBegin;
1539   aA->getrow_utriangular = PETSC_TRUE;
1540   PetscFunctionReturn(0);
1541 }
1542 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1543 {
1544   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1545   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1546 
1547   PetscFunctionBegin;
1548   aA->getrow_utriangular = PETSC_FALSE;
1549   PetscFunctionReturn(0);
1550 }
1551 
1552 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1553 {
1554   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1555   PetscErrorCode ierr;
1556 
1557   PetscFunctionBegin;
1558   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1559   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1564 {
1565   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1566   PetscErrorCode ierr;
1567 
1568   PetscFunctionBegin;
1569   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1570   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1575    Input: isrow       - distributed(parallel),
1576           iscol_local - locally owned (seq)
1577 */
1578 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1579 {
1580   PetscErrorCode ierr;
1581   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1582   const PetscInt *ptr1,*ptr2;
1583 
1584   PetscFunctionBegin;
1585   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1586   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1587   if (sz1 > sz2) {
1588     *flg = PETSC_FALSE;
1589     PetscFunctionReturn(0);
1590   }
1591 
1592   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1593   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1594 
1595   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1596   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1597   ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr);
1598   ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr);
1599   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1600   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1601 
1602   nmatch=0;
1603   k     = 0;
1604   for (i=0; i<sz1; i++){
1605     for (j=k; j<sz2; j++){
1606       if (a1[i] == a2[j]) {
1607         k = j; nmatch++;
1608         break;
1609       }
1610     }
1611   }
1612   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1613   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1614   ierr = PetscFree(a1);CHKERRQ(ierr);
1615   ierr = PetscFree(a2);CHKERRQ(ierr);
1616   if (nmatch < sz1) {
1617     *flg = PETSC_FALSE;
1618   } else {
1619     *flg = PETSC_TRUE;
1620   }
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1625 {
1626   PetscErrorCode ierr;
1627   IS             iscol_local;
1628   PetscInt       csize;
1629   PetscBool      isequal;
1630 
1631   PetscFunctionBegin;
1632   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1633   if (call == MAT_REUSE_MATRIX) {
1634     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1635     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1636   } else {
1637     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1638     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1639     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1640   }
1641 
1642   /* now call MatCreateSubMatrix_MPIBAIJ() */
1643   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1644   if (call == MAT_INITIAL_MATRIX) {
1645     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1646     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1647   }
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1652 {
1653   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1654   PetscErrorCode ierr;
1655 
1656   PetscFunctionBegin;
1657   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1658   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1659   PetscFunctionReturn(0);
1660 }
1661 
1662 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1663 {
1664   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1665   Mat            A  = a->A,B = a->B;
1666   PetscErrorCode ierr;
1667   PetscLogDouble isend[5],irecv[5];
1668 
1669   PetscFunctionBegin;
1670   info->block_size = (PetscReal)matin->rmap->bs;
1671 
1672   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1673 
1674   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1675   isend[3] = info->memory;  isend[4] = info->mallocs;
1676 
1677   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1678 
1679   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1680   isend[3] += info->memory;  isend[4] += info->mallocs;
1681   if (flag == MAT_LOCAL) {
1682     info->nz_used      = isend[0];
1683     info->nz_allocated = isend[1];
1684     info->nz_unneeded  = isend[2];
1685     info->memory       = isend[3];
1686     info->mallocs      = isend[4];
1687   } else if (flag == MAT_GLOBAL_MAX) {
1688     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1689 
1690     info->nz_used      = irecv[0];
1691     info->nz_allocated = irecv[1];
1692     info->nz_unneeded  = irecv[2];
1693     info->memory       = irecv[3];
1694     info->mallocs      = irecv[4];
1695   } else if (flag == MAT_GLOBAL_SUM) {
1696     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1697 
1698     info->nz_used      = irecv[0];
1699     info->nz_allocated = irecv[1];
1700     info->nz_unneeded  = irecv[2];
1701     info->memory       = irecv[3];
1702     info->mallocs      = irecv[4];
1703   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1704   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1705   info->fill_ratio_needed = 0;
1706   info->factor_mallocs    = 0;
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1711 {
1712   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1713   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1714   PetscErrorCode ierr;
1715 
1716   PetscFunctionBegin;
1717   switch (op) {
1718   case MAT_NEW_NONZERO_LOCATIONS:
1719   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1720   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1721   case MAT_KEEP_NONZERO_PATTERN:
1722   case MAT_SUBMAT_SINGLEIS:
1723   case MAT_NEW_NONZERO_LOCATION_ERR:
1724     MatCheckPreallocated(A,1);
1725     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1726     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1727     break;
1728   case MAT_ROW_ORIENTED:
1729     MatCheckPreallocated(A,1);
1730     a->roworiented = flg;
1731 
1732     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1733     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1734     break;
1735   case MAT_NEW_DIAGONALS:
1736   case MAT_SORTED_FULL:
1737     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1738     break;
1739   case MAT_IGNORE_OFF_PROC_ENTRIES:
1740     a->donotstash = flg;
1741     break;
1742   case MAT_USE_HASH_TABLE:
1743     a->ht_flag = flg;
1744     break;
1745   case MAT_HERMITIAN:
1746     MatCheckPreallocated(A,1);
1747     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1748 #if defined(PETSC_USE_COMPLEX)
1749     if (flg) { /* need different mat-vec ops */
1750       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1751       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1752       A->ops->multtranspose    = NULL;
1753       A->ops->multtransposeadd = NULL;
1754       A->symmetric = PETSC_FALSE;
1755     }
1756 #endif
1757     break;
1758   case MAT_SPD:
1759   case MAT_SYMMETRIC:
1760     MatCheckPreallocated(A,1);
1761     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1762 #if defined(PETSC_USE_COMPLEX)
1763     if (flg) { /* restore to use default mat-vec ops */
1764       A->ops->mult             = MatMult_MPISBAIJ;
1765       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1766       A->ops->multtranspose    = MatMult_MPISBAIJ;
1767       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1768     }
1769 #endif
1770     break;
1771   case MAT_STRUCTURALLY_SYMMETRIC:
1772     MatCheckPreallocated(A,1);
1773     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1774     break;
1775   case MAT_SYMMETRY_ETERNAL:
1776     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1777     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1778     break;
1779   case MAT_IGNORE_LOWER_TRIANGULAR:
1780     aA->ignore_ltriangular = flg;
1781     break;
1782   case MAT_ERROR_LOWER_TRIANGULAR:
1783     aA->ignore_ltriangular = flg;
1784     break;
1785   case MAT_GETROW_UPPERTRIANGULAR:
1786     aA->getrow_utriangular = flg;
1787     break;
1788   default:
1789     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1790   }
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1795 {
1796   PetscErrorCode ierr;
1797 
1798   PetscFunctionBegin;
1799   if (reuse == MAT_INITIAL_MATRIX) {
1800     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1801   }  else if (reuse == MAT_REUSE_MATRIX) {
1802     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1803   }
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1808 {
1809   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1810   Mat            a     = baij->A, b=baij->B;
1811   PetscErrorCode ierr;
1812   PetscInt       nv,m,n;
1813   PetscBool      flg;
1814 
1815   PetscFunctionBegin;
1816   if (ll != rr) {
1817     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1818     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1819   }
1820   if (!ll) PetscFunctionReturn(0);
1821 
1822   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1823   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1824 
1825   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1826   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1827 
1828   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1829 
1830   /* left diagonalscale the off-diagonal part */
1831   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1832 
1833   /* scale the diagonal part */
1834   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1835 
1836   /* right diagonalscale the off-diagonal part */
1837   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1838   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1843 {
1844   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1845   PetscErrorCode ierr;
1846 
1847   PetscFunctionBegin;
1848   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1853 
1854 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1855 {
1856   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1857   Mat            a,b,c,d;
1858   PetscBool      flg;
1859   PetscErrorCode ierr;
1860 
1861   PetscFunctionBegin;
1862   a = matA->A; b = matA->B;
1863   c = matB->A; d = matB->B;
1864 
1865   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1866   if (flg) {
1867     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1868   }
1869   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1874 {
1875   PetscErrorCode ierr;
1876   PetscBool      isbaij;
1877 
1878   PetscFunctionBegin;
1879   ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
1880   if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1881   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1882   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1883     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1884     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1885     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1886   } else {
1887     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1888     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1889 
1890     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1891     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1892   }
1893   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1898 {
1899   PetscErrorCode ierr;
1900 
1901   PetscFunctionBegin;
1902   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1907 {
1908   PetscErrorCode ierr;
1909   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1910   PetscBLASInt   bnz,one=1;
1911   Mat_SeqSBAIJ   *xa,*ya;
1912   Mat_SeqBAIJ    *xb,*yb;
1913 
1914   PetscFunctionBegin;
1915   if (str == SAME_NONZERO_PATTERN) {
1916     PetscScalar alpha = a;
1917     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1918     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1919     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1920     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1921     xb   = (Mat_SeqBAIJ*)xx->B->data;
1922     yb   = (Mat_SeqBAIJ*)yy->B->data;
1923     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1924     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1925     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1926   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1927     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1928     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1929     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1930   } else {
1931     Mat      B;
1932     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1933     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1934     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1935     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1936     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1937     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1938     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1939     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1940     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1941     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1942     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1943     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1944     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1945     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1946     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1947     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1948     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1949     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1950     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1951     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1952   }
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1957 {
1958   PetscErrorCode ierr;
1959   PetscInt       i;
1960   PetscBool      flg;
1961 
1962   PetscFunctionBegin;
1963   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1964   for (i=0; i<n; i++) {
1965     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1966     if (!flg) {
1967       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1968     }
1969   }
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1974 {
1975   PetscErrorCode ierr;
1976   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1977   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1978 
1979   PetscFunctionBegin;
1980   if (!Y->preallocated) {
1981     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1982   } else if (!aij->nz) {
1983     PetscInt nonew = aij->nonew;
1984     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1985     aij->nonew = nonew;
1986   }
1987   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1992 {
1993   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1994   PetscErrorCode ierr;
1995 
1996   PetscFunctionBegin;
1997   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1998   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1999   if (d) {
2000     PetscInt rstart;
2001     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2002     *d += rstart/A->rmap->bs;
2003 
2004   }
2005   PetscFunctionReturn(0);
2006 }
2007 
2008 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
2009 {
2010   PetscFunctionBegin;
2011   *a = ((Mat_MPISBAIJ*)A->data)->A;
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 /* -------------------------------------------------------------------*/
2016 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
2017                                        MatGetRow_MPISBAIJ,
2018                                        MatRestoreRow_MPISBAIJ,
2019                                        MatMult_MPISBAIJ,
2020                                /*  4*/ MatMultAdd_MPISBAIJ,
2021                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
2022                                        MatMultAdd_MPISBAIJ,
2023                                        0,
2024                                        0,
2025                                        0,
2026                                /* 10*/ 0,
2027                                        0,
2028                                        0,
2029                                        MatSOR_MPISBAIJ,
2030                                        MatTranspose_MPISBAIJ,
2031                                /* 15*/ MatGetInfo_MPISBAIJ,
2032                                        MatEqual_MPISBAIJ,
2033                                        MatGetDiagonal_MPISBAIJ,
2034                                        MatDiagonalScale_MPISBAIJ,
2035                                        MatNorm_MPISBAIJ,
2036                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
2037                                        MatAssemblyEnd_MPISBAIJ,
2038                                        MatSetOption_MPISBAIJ,
2039                                        MatZeroEntries_MPISBAIJ,
2040                                /* 24*/ 0,
2041                                        0,
2042                                        0,
2043                                        0,
2044                                        0,
2045                                /* 29*/ MatSetUp_MPISBAIJ,
2046                                        0,
2047                                        0,
2048                                        MatGetDiagonalBlock_MPISBAIJ,
2049                                        0,
2050                                /* 34*/ MatDuplicate_MPISBAIJ,
2051                                        0,
2052                                        0,
2053                                        0,
2054                                        0,
2055                                /* 39*/ MatAXPY_MPISBAIJ,
2056                                        MatCreateSubMatrices_MPISBAIJ,
2057                                        MatIncreaseOverlap_MPISBAIJ,
2058                                        MatGetValues_MPISBAIJ,
2059                                        MatCopy_MPISBAIJ,
2060                                /* 44*/ 0,
2061                                        MatScale_MPISBAIJ,
2062                                        MatShift_MPISBAIJ,
2063                                        0,
2064                                        0,
2065                                /* 49*/ 0,
2066                                        0,
2067                                        0,
2068                                        0,
2069                                        0,
2070                                /* 54*/ 0,
2071                                        0,
2072                                        MatSetUnfactored_MPISBAIJ,
2073                                        0,
2074                                        MatSetValuesBlocked_MPISBAIJ,
2075                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
2076                                        0,
2077                                        0,
2078                                        0,
2079                                        0,
2080                                /* 64*/ 0,
2081                                        0,
2082                                        0,
2083                                        0,
2084                                        0,
2085                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
2086                                        0,
2087                                        MatConvert_MPISBAIJ_Basic,
2088                                        0,
2089                                        0,
2090                                /* 74*/ 0,
2091                                        0,
2092                                        0,
2093                                        0,
2094                                        0,
2095                                /* 79*/ 0,
2096                                        0,
2097                                        0,
2098                                        0,
2099                                        MatLoad_MPISBAIJ,
2100                                /* 84*/ 0,
2101                                        0,
2102                                        0,
2103                                        0,
2104                                        0,
2105                                /* 89*/ 0,
2106                                        0,
2107                                        0,
2108                                        0,
2109                                        0,
2110                                /* 94*/ 0,
2111                                        0,
2112                                        0,
2113                                        0,
2114                                        0,
2115                                /* 99*/ 0,
2116                                        0,
2117                                        0,
2118                                        0,
2119                                        0,
2120                                /*104*/ 0,
2121                                        MatRealPart_MPISBAIJ,
2122                                        MatImaginaryPart_MPISBAIJ,
2123                                        MatGetRowUpperTriangular_MPISBAIJ,
2124                                        MatRestoreRowUpperTriangular_MPISBAIJ,
2125                                /*109*/ 0,
2126                                        0,
2127                                        0,
2128                                        0,
2129                                        MatMissingDiagonal_MPISBAIJ,
2130                                /*114*/ 0,
2131                                        0,
2132                                        0,
2133                                        0,
2134                                        0,
2135                                /*119*/ 0,
2136                                        0,
2137                                        0,
2138                                        0,
2139                                        0,
2140                                /*124*/ 0,
2141                                        0,
2142                                        0,
2143                                        0,
2144                                        0,
2145                                /*129*/ 0,
2146                                        0,
2147                                        0,
2148                                        0,
2149                                        0,
2150                                /*134*/ 0,
2151                                        0,
2152                                        0,
2153                                        0,
2154                                        0,
2155                                /*139*/ MatSetBlockSizes_Default,
2156                                        0,
2157                                        0,
2158                                        0,
2159                                        0,
2160                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2161 };
2162 
2163 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2164 {
2165   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
2166   PetscErrorCode ierr;
2167   PetscInt       i,mbs,Mbs;
2168   PetscMPIInt    size;
2169 
2170   PetscFunctionBegin;
2171   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2172   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2173   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2174   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2175   if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N);
2176   if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n);
2177 
2178   mbs = B->rmap->n/bs;
2179   Mbs = B->rmap->N/bs;
2180   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2181 
2182   B->rmap->bs = bs;
2183   b->bs2      = bs*bs;
2184   b->mbs      = mbs;
2185   b->Mbs      = Mbs;
2186   b->nbs      = B->cmap->n/bs;
2187   b->Nbs      = B->cmap->N/bs;
2188 
2189   for (i=0; i<=b->size; i++) {
2190     b->rangebs[i] = B->rmap->range[i]/bs;
2191   }
2192   b->rstartbs = B->rmap->rstart/bs;
2193   b->rendbs   = B->rmap->rend/bs;
2194 
2195   b->cstartbs = B->cmap->rstart/bs;
2196   b->cendbs   = B->cmap->rend/bs;
2197 
2198 #if defined(PETSC_USE_CTABLE)
2199   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2200 #else
2201   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2202 #endif
2203   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2204   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2205   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2206   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2207   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2208   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2209   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2210   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2211   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2212 
2213   /* Because the B will have been resized we simply destroy it and create a new one each time */
2214   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2215   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2216   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2217   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2218   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2219   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2220 
2221   if (!B->preallocated) {
2222     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2223     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2224     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2225     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2226     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2227   }
2228 
2229   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2230   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2231 
2232   B->preallocated  = PETSC_TRUE;
2233   B->was_assembled = PETSC_FALSE;
2234   B->assembled     = PETSC_FALSE;
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2239 {
2240   PetscInt       m,rstart,cend;
2241   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0;
2242   const PetscInt *JJ    =0;
2243   PetscScalar    *values=0;
2244   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
2245   PetscErrorCode ierr;
2246   PetscBool      nooffprocentries;
2247 
2248   PetscFunctionBegin;
2249   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2250   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2251   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2252   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2253   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2254   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2255   m      = B->rmap->n/bs;
2256   rstart = B->rmap->rstart/bs;
2257   cend   = B->cmap->rend/bs;
2258 
2259   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2260   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2261   for (i=0; i<m; i++) {
2262     nz = ii[i+1] - ii[i];
2263     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2264     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2265     JJ     = jj + ii[i];
2266     bd     = 0;
2267     for (j=0; j<nz; j++) {
2268       if (*JJ >= i + rstart) break;
2269       JJ++;
2270       bd++;
2271     }
2272     d  = 0;
2273     for (; j<nz; j++) {
2274       if (*JJ++ >= cend) break;
2275       d++;
2276     }
2277     d_nnz[i] = d;
2278     o_nnz[i] = nz - d - bd;
2279     nz       = nz - bd;
2280     nz_max = PetscMax(nz_max,nz);
2281   }
2282   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2283   ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2284   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2285 
2286   values = (PetscScalar*)V;
2287   if (!values) {
2288     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2289   }
2290   for (i=0; i<m; i++) {
2291     PetscInt          row    = i + rstart;
2292     PetscInt          ncols  = ii[i+1] - ii[i];
2293     const PetscInt    *icols = jj + ii[i];
2294     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2295       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2296       ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2297     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2298       PetscInt j;
2299       for (j=0; j<ncols; j++) {
2300         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2301         ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2302       }
2303     }
2304   }
2305 
2306   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2307   nooffprocentries    = B->nooffprocentries;
2308   B->nooffprocentries = PETSC_TRUE;
2309   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2310   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2311   B->nooffprocentries = nooffprocentries;
2312 
2313   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 /*MC
2318    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2319    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2320    the matrix is stored.
2321 
2322    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2323    can call MatSetOption(Mat, MAT_HERMITIAN);
2324 
2325    Options Database Keys:
2326 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2327 
2328    Notes:
2329      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2330      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2331 
2332    Level: beginner
2333 
2334 .seealso: MatCreateMPISBAIJ(), MATSEQSBAIJ, MatType
2335 M*/
2336 
2337 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2338 {
2339   Mat_MPISBAIJ   *b;
2340   PetscErrorCode ierr;
2341   PetscBool      flg = PETSC_FALSE;
2342 
2343   PetscFunctionBegin;
2344   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2345   B->data = (void*)b;
2346   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2347 
2348   B->ops->destroy = MatDestroy_MPISBAIJ;
2349   B->ops->view    = MatView_MPISBAIJ;
2350   B->assembled    = PETSC_FALSE;
2351   B->insertmode   = NOT_SET_VALUES;
2352 
2353   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2354   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2355 
2356   /* build local table of row and column ownerships */
2357   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2358 
2359   /* build cache for off array entries formed */
2360   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2361 
2362   b->donotstash  = PETSC_FALSE;
2363   b->colmap      = NULL;
2364   b->garray      = NULL;
2365   b->roworiented = PETSC_TRUE;
2366 
2367   /* stuff used in block assembly */
2368   b->barray = 0;
2369 
2370   /* stuff used for matrix vector multiply */
2371   b->lvec    = 0;
2372   b->Mvctx   = 0;
2373   b->slvec0  = 0;
2374   b->slvec0b = 0;
2375   b->slvec1  = 0;
2376   b->slvec1a = 0;
2377   b->slvec1b = 0;
2378   b->sMvctx  = 0;
2379 
2380   /* stuff for MatGetRow() */
2381   b->rowindices   = 0;
2382   b->rowvalues    = 0;
2383   b->getrowactive = PETSC_FALSE;
2384 
2385   /* hash table stuff */
2386   b->ht           = 0;
2387   b->hd           = 0;
2388   b->ht_size      = 0;
2389   b->ht_flag      = PETSC_FALSE;
2390   b->ht_fact      = 0;
2391   b->ht_total_ct  = 0;
2392   b->ht_insert_ct = 0;
2393 
2394   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2395   b->ijonly = PETSC_FALSE;
2396 
2397   b->in_loc = 0;
2398   b->v_loc  = 0;
2399   b->n_loc  = 0;
2400 
2401   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2402   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2403   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2404   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2405 #if defined(PETSC_HAVE_ELEMENTAL)
2406   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2407 #endif
2408   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2409   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2410 
2411   B->symmetric                  = PETSC_TRUE;
2412   B->structurally_symmetric     = PETSC_TRUE;
2413   B->symmetric_set              = PETSC_TRUE;
2414   B->structurally_symmetric_set = PETSC_TRUE;
2415   B->symmetric_eternal          = PETSC_TRUE;
2416 #if defined(PETSC_USE_COMPLEX)
2417   B->hermitian                  = PETSC_FALSE;
2418   B->hermitian_set              = PETSC_FALSE;
2419 #else
2420   B->hermitian                  = PETSC_TRUE;
2421   B->hermitian_set              = PETSC_TRUE;
2422 #endif
2423 
2424   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2425   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2426   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2427   if (flg) {
2428     PetscReal fact = 1.39;
2429     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2430     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2431     if (fact <= 1.0) fact = 1.39;
2432     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2433     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2434   }
2435   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 /*MC
2440    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2441 
2442    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2443    and MATMPISBAIJ otherwise.
2444 
2445    Options Database Keys:
2446 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2447 
2448   Level: beginner
2449 
2450 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2451 M*/
2452 
2453 /*@C
2454    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2455    the user should preallocate the matrix storage by setting the parameters
2456    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2457    performance can be increased by more than a factor of 50.
2458 
2459    Collective on Mat
2460 
2461    Input Parameters:
2462 +  B - the matrix
2463 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2464           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2465 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2466            submatrix  (same for all local rows)
2467 .  d_nnz - array containing the number of block nonzeros in the various block rows
2468            in the upper triangular and diagonal part of the in diagonal portion of the local
2469            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2470            for the diagonal entry and set a value even if it is zero.
2471 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2472            submatrix (same for all local rows).
2473 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2474            off-diagonal portion of the local submatrix that is right of the diagonal
2475            (possibly different for each block row) or NULL.
2476 
2477 
2478    Options Database Keys:
2479 +   -mat_no_unroll - uses code that does not unroll the loops in the
2480                      block calculations (much slower)
2481 -   -mat_block_size - size of the blocks to use
2482 
2483    Notes:
2484 
2485    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2486    than it must be used on all processors that share the object for that argument.
2487 
2488    If the *_nnz parameter is given then the *_nz parameter is ignored
2489 
2490    Storage Information:
2491    For a square global matrix we define each processor's diagonal portion
2492    to be its local rows and the corresponding columns (a square submatrix);
2493    each processor's off-diagonal portion encompasses the remainder of the
2494    local matrix (a rectangular submatrix).
2495 
2496    The user can specify preallocated storage for the diagonal part of
2497    the local submatrix with either d_nz or d_nnz (not both).  Set
2498    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2499    memory allocation.  Likewise, specify preallocated storage for the
2500    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2501 
2502    You can call MatGetInfo() to get information on how effective the preallocation was;
2503    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2504    You can also run with the option -info and look for messages with the string
2505    malloc in them to see if additional memory allocation was needed.
2506 
2507    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2508    the figure below we depict these three local rows and all columns (0-11).
2509 
2510 .vb
2511            0 1 2 3 4 5 6 7 8 9 10 11
2512           --------------------------
2513    row 3  |. . . d d d o o o o  o  o
2514    row 4  |. . . d d d o o o o  o  o
2515    row 5  |. . . d d d o o o o  o  o
2516           --------------------------
2517 .ve
2518 
2519    Thus, any entries in the d locations are stored in the d (diagonal)
2520    submatrix, and any entries in the o locations are stored in the
2521    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2522    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2523 
2524    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2525    plus the diagonal part of the d matrix,
2526    and o_nz should indicate the number of block nonzeros per row in the o matrix
2527 
2528    In general, for PDE problems in which most nonzeros are near the diagonal,
2529    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2530    or you will get TERRIBLE performance; see the users' manual chapter on
2531    matrices.
2532 
2533    Level: intermediate
2534 
2535 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2536 @*/
2537 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2538 {
2539   PetscErrorCode ierr;
2540 
2541   PetscFunctionBegin;
2542   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2543   PetscValidType(B,1);
2544   PetscValidLogicalCollectiveInt(B,bs,2);
2545   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 /*@C
2550    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2551    (block compressed row).  For good matrix assembly performance
2552    the user should preallocate the matrix storage by setting the parameters
2553    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2554    performance can be increased by more than a factor of 50.
2555 
2556    Collective
2557 
2558    Input Parameters:
2559 +  comm - MPI communicator
2560 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2561           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2562 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2563            This value should be the same as the local size used in creating the
2564            y vector for the matrix-vector product y = Ax.
2565 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2566            This value should be the same as the local size used in creating the
2567            x vector for the matrix-vector product y = Ax.
2568 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2569 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2570 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2571            submatrix  (same for all local rows)
2572 .  d_nnz - array containing the number of block nonzeros in the various block rows
2573            in the upper triangular portion of the in diagonal portion of the local
2574            (possibly different for each block block row) or NULL.
2575            If you plan to factor the matrix you must leave room for the diagonal entry and
2576            set its value even if it is zero.
2577 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2578            submatrix (same for all local rows).
2579 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2580            off-diagonal portion of the local submatrix (possibly different for
2581            each block row) or NULL.
2582 
2583    Output Parameter:
2584 .  A - the matrix
2585 
2586    Options Database Keys:
2587 +   -mat_no_unroll - uses code that does not unroll the loops in the
2588                      block calculations (much slower)
2589 .   -mat_block_size - size of the blocks to use
2590 -   -mat_mpi - use the parallel matrix data structures even on one processor
2591                (defaults to using SeqBAIJ format on one processor)
2592 
2593    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2594    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2595    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2596 
2597    Notes:
2598    The number of rows and columns must be divisible by blocksize.
2599    This matrix type does not support complex Hermitian operation.
2600 
2601    The user MUST specify either the local or global matrix dimensions
2602    (possibly both).
2603 
2604    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2605    than it must be used on all processors that share the object for that argument.
2606 
2607    If the *_nnz parameter is given then the *_nz parameter is ignored
2608 
2609    Storage Information:
2610    For a square global matrix we define each processor's diagonal portion
2611    to be its local rows and the corresponding columns (a square submatrix);
2612    each processor's off-diagonal portion encompasses the remainder of the
2613    local matrix (a rectangular submatrix).
2614 
2615    The user can specify preallocated storage for the diagonal part of
2616    the local submatrix with either d_nz or d_nnz (not both).  Set
2617    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2618    memory allocation.  Likewise, specify preallocated storage for the
2619    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2620 
2621    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2622    the figure below we depict these three local rows and all columns (0-11).
2623 
2624 .vb
2625            0 1 2 3 4 5 6 7 8 9 10 11
2626           --------------------------
2627    row 3  |. . . d d d o o o o  o  o
2628    row 4  |. . . d d d o o o o  o  o
2629    row 5  |. . . d d d o o o o  o  o
2630           --------------------------
2631 .ve
2632 
2633    Thus, any entries in the d locations are stored in the d (diagonal)
2634    submatrix, and any entries in the o locations are stored in the
2635    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2636    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2637 
2638    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2639    plus the diagonal part of the d matrix,
2640    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2641    In general, for PDE problems in which most nonzeros are near the diagonal,
2642    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2643    or you will get TERRIBLE performance; see the users' manual chapter on
2644    matrices.
2645 
2646    Level: intermediate
2647 
2648 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2649 @*/
2650 
2651 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2652 {
2653   PetscErrorCode ierr;
2654   PetscMPIInt    size;
2655 
2656   PetscFunctionBegin;
2657   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2658   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2659   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2660   if (size > 1) {
2661     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2662     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2663   } else {
2664     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2665     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2666   }
2667   PetscFunctionReturn(0);
2668 }
2669 
2670 
2671 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2672 {
2673   Mat            mat;
2674   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2675   PetscErrorCode ierr;
2676   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2677   PetscScalar    *array;
2678 
2679   PetscFunctionBegin;
2680   *newmat = 0;
2681 
2682   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2683   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2684   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2685   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2686   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2687 
2688   mat->factortype   = matin->factortype;
2689   mat->preallocated = PETSC_TRUE;
2690   mat->assembled    = PETSC_TRUE;
2691   mat->insertmode   = NOT_SET_VALUES;
2692 
2693   a      = (Mat_MPISBAIJ*)mat->data;
2694   a->bs2 = oldmat->bs2;
2695   a->mbs = oldmat->mbs;
2696   a->nbs = oldmat->nbs;
2697   a->Mbs = oldmat->Mbs;
2698   a->Nbs = oldmat->Nbs;
2699 
2700   a->size         = oldmat->size;
2701   a->rank         = oldmat->rank;
2702   a->donotstash   = oldmat->donotstash;
2703   a->roworiented  = oldmat->roworiented;
2704   a->rowindices   = 0;
2705   a->rowvalues    = 0;
2706   a->getrowactive = PETSC_FALSE;
2707   a->barray       = 0;
2708   a->rstartbs     = oldmat->rstartbs;
2709   a->rendbs       = oldmat->rendbs;
2710   a->cstartbs     = oldmat->cstartbs;
2711   a->cendbs       = oldmat->cendbs;
2712 
2713   /* hash table stuff */
2714   a->ht           = 0;
2715   a->hd           = 0;
2716   a->ht_size      = 0;
2717   a->ht_flag      = oldmat->ht_flag;
2718   a->ht_fact      = oldmat->ht_fact;
2719   a->ht_total_ct  = 0;
2720   a->ht_insert_ct = 0;
2721 
2722   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr);
2723   if (oldmat->colmap) {
2724 #if defined(PETSC_USE_CTABLE)
2725     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2726 #else
2727     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2728     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2729     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
2730 #endif
2731   } else a->colmap = 0;
2732 
2733   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2734     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2735     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2736     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
2737   } else a->garray = 0;
2738 
2739   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2740   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2741   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2742   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2743   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2744 
2745   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2746   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2747   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2748   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2749 
2750   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2751   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2752   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2753   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2754   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2755   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2756   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2757   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2758   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2759   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2760   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2761   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2762   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2763 
2764   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2765   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2766   a->sMvctx = oldmat->sMvctx;
2767   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2768 
2769   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2770   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2771   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2772   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2773   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2774   *newmat = mat;
2775   PetscFunctionReturn(0);
2776 }
2777 
2778 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2779 {
2780   PetscErrorCode ierr;
2781   PetscInt       i,nz,j,rstart,rend;
2782   PetscScalar    *vals,*buf;
2783   MPI_Comm       comm;
2784   MPI_Status     status;
2785   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2786   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2787   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2788   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2789   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2790   PetscInt       dcount,kmax,k,nzcount,tmp;
2791   int            fd;
2792   PetscBool      isbinary;
2793 
2794   PetscFunctionBegin;
2795   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2796   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
2797 
2798   /* force binary viewer to load .info file if it has not yet done so */
2799   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2800   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2801   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2802   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2803   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2804   if (bs < 0) bs = 1;
2805 
2806   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2807   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2808   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2809   if (!rank) {
2810     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2811     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2812     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2813   }
2814 
2815   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2816   M    = header[1];
2817   N    = header[2];
2818 
2819   /* If global sizes are set, check if they are consistent with that given in the file */
2820   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2821   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2822 
2823   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2824 
2825   /*
2826      This code adds extra rows to make sure the number of rows is
2827      divisible by the blocksize
2828   */
2829   Mbs        = M/bs;
2830   extra_rows = bs - M + bs*(Mbs);
2831   if (extra_rows == bs) extra_rows = 0;
2832   else                  Mbs++;
2833   if (extra_rows &&!rank) {
2834     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2835   }
2836 
2837   /* determine ownership of all rows */
2838   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2839     mbs = Mbs/size + ((Mbs % size) > rank);
2840     m   = mbs*bs;
2841   } else { /* User Set */
2842     m   = newmat->rmap->n;
2843     mbs = m/bs;
2844   }
2845   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2846   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2847   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2848   rowners[0] = 0;
2849   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2850   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2851   rstart = rowners[rank];
2852   rend   = rowners[rank+1];
2853 
2854   /* distribute row lengths to all processors */
2855   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2856   if (!rank) {
2857     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2858     ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
2859     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2860     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2861     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2862     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2863     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2864   } else {
2865     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2866   }
2867 
2868   if (!rank) {   /* procs[0] */
2869     /* calculate the number of nonzeros on each processor */
2870     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2871     for (i=0; i<size; i++) {
2872       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2873         procsnz[i] += rowlengths[j];
2874       }
2875     }
2876     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2877 
2878     /* determine max buffer needed and allocate it */
2879     maxnz = 0;
2880     for (i=0; i<size; i++) {
2881       maxnz = PetscMax(maxnz,procsnz[i]);
2882     }
2883     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2884 
2885     /* read in my part of the matrix column indices  */
2886     nz     = procsnz[0];
2887     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2888     mycols = ibuf;
2889     if (size == 1) nz -= extra_rows;
2890     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
2891     if (size == 1) {
2892       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2893     }
2894 
2895     /* read in every ones (except the last) and ship off */
2896     for (i=1; i<size-1; i++) {
2897       nz   = procsnz[i];
2898       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
2899       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2900     }
2901     /* read in the stuff for the last proc */
2902     if (size != 1) {
2903       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2904       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
2905       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2906       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2907     }
2908     ierr = PetscFree(cols);CHKERRQ(ierr);
2909   } else {  /* procs[i], i>0 */
2910     /* determine buffer space needed for message */
2911     nz = 0;
2912     for (i=0; i<m; i++) nz += locrowlens[i];
2913     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2914     mycols = ibuf;
2915     /* receive message of column indices*/
2916     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2917     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2918     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2919   }
2920 
2921   /* loop over local rows, determining number of off diagonal entries */
2922   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2923   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2924   rowcount = 0;
2925   nzcount  = 0;
2926   for (i=0; i<mbs; i++) {
2927     dcount  = 0;
2928     odcount = 0;
2929     for (j=0; j<bs; j++) {
2930       kmax = locrowlens[rowcount];
2931       for (k=0; k<kmax; k++) {
2932         tmp = mycols[nzcount++]/bs; /* block col. index */
2933         if (!mask[tmp]) {
2934           mask[tmp] = 1;
2935           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2936           else masked1[dcount++] = tmp; /* entry in diag portion */
2937         }
2938       }
2939       rowcount++;
2940     }
2941 
2942     dlens[i]  = dcount;  /* d_nzz[i] */
2943     odlens[i] = odcount; /* o_nzz[i] */
2944 
2945     /* zero out the mask elements we set */
2946     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2947     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2948   }
2949   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2950   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2951   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2952 
2953   if (!rank) {
2954     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2955     /* read in my part of the matrix numerical values  */
2956     nz     = procsnz[0];
2957     vals   = buf;
2958     mycols = ibuf;
2959     if (size == 1) nz -= extra_rows;
2960     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
2961     if (size == 1) {
2962       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2963     }
2964 
2965     /* insert into matrix */
2966     jj = rstart*bs;
2967     for (i=0; i<m; i++) {
2968       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2969       mycols += locrowlens[i];
2970       vals   += locrowlens[i];
2971       jj++;
2972     }
2973 
2974     /* read in other processors (except the last one) and ship out */
2975     for (i=1; i<size-1; i++) {
2976       nz   = procsnz[i];
2977       vals = buf;
2978       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
2979       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2980     }
2981     /* the last proc */
2982     if (size != 1) {
2983       nz   = procsnz[i] - extra_rows;
2984       vals = buf;
2985       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
2986       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2987       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2988     }
2989     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2990 
2991   } else {
2992     /* receive numeric values */
2993     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2994 
2995     /* receive message of values*/
2996     vals   = buf;
2997     mycols = ibuf;
2998     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2999     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3000     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3001 
3002     /* insert into matrix */
3003     jj = rstart*bs;
3004     for (i=0; i<m; i++) {
3005       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3006       mycols += locrowlens[i];
3007       vals   += locrowlens[i];
3008       jj++;
3009     }
3010   }
3011 
3012   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3013   ierr = PetscFree(buf);CHKERRQ(ierr);
3014   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3015   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3016   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3017   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3018   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3019   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3020   PetscFunctionReturn(0);
3021 }
3022 
3023 /*XXXXX@
3024    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3025 
3026    Input Parameters:
3027 .  mat  - the matrix
3028 .  fact - factor
3029 
3030    Not Collective on Mat, each process can have a different hash factor
3031 
3032    Level: advanced
3033 
3034   Notes:
3035    This can also be set by the command line option: -mat_use_hash_table fact
3036 
3037 .seealso: MatSetOption()
3038 @XXXXX*/
3039 
3040 
3041 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
3042 {
3043   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
3044   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
3045   PetscReal      atmp;
3046   PetscReal      *work,*svalues,*rvalues;
3047   PetscErrorCode ierr;
3048   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
3049   PetscMPIInt    rank,size;
3050   PetscInt       *rowners_bs,dest,count,source;
3051   PetscScalar    *va;
3052   MatScalar      *ba;
3053   MPI_Status     stat;
3054 
3055   PetscFunctionBegin;
3056   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
3057   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
3058   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3059 
3060   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3061   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3062 
3063   bs  = A->rmap->bs;
3064   mbs = a->mbs;
3065   Mbs = a->Mbs;
3066   ba  = b->a;
3067   bi  = b->i;
3068   bj  = b->j;
3069 
3070   /* find ownerships */
3071   rowners_bs = A->rmap->range;
3072 
3073   /* each proc creates an array to be distributed */
3074   ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr);
3075 
3076   /* row_max for B */
3077   if (rank != size-1) {
3078     for (i=0; i<mbs; i++) {
3079       ncols = bi[1] - bi[0]; bi++;
3080       brow  = bs*i;
3081       for (j=0; j<ncols; j++) {
3082         bcol = bs*(*bj);
3083         for (kcol=0; kcol<bs; kcol++) {
3084           col  = bcol + kcol;                /* local col index */
3085           col += rowners_bs[rank+1];      /* global col index */
3086           for (krow=0; krow<bs; krow++) {
3087             atmp = PetscAbsScalar(*ba); ba++;
3088             row  = brow + krow;   /* local row index */
3089             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
3090             if (work[col] < atmp) work[col] = atmp;
3091           }
3092         }
3093         bj++;
3094       }
3095     }
3096 
3097     /* send values to its owners */
3098     for (dest=rank+1; dest<size; dest++) {
3099       svalues = work + rowners_bs[dest];
3100       count   = rowners_bs[dest+1]-rowners_bs[dest];
3101       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
3102     }
3103   }
3104 
3105   /* receive values */
3106   if (rank) {
3107     rvalues = work;
3108     count   = rowners_bs[rank+1]-rowners_bs[rank];
3109     for (source=0; source<rank; source++) {
3110       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
3111       /* process values */
3112       for (i=0; i<count; i++) {
3113         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
3114       }
3115     }
3116   }
3117 
3118   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3119   ierr = PetscFree(work);CHKERRQ(ierr);
3120   PetscFunctionReturn(0);
3121 }
3122 
3123 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3124 {
3125   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
3126   PetscErrorCode    ierr;
3127   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
3128   PetscScalar       *x,*ptr,*from;
3129   Vec               bb1;
3130   const PetscScalar *b;
3131 
3132   PetscFunctionBegin;
3133   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3134   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3135 
3136   if (flag == SOR_APPLY_UPPER) {
3137     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3138     PetscFunctionReturn(0);
3139   }
3140 
3141   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3142     if (flag & SOR_ZERO_INITIAL_GUESS) {
3143       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3144       its--;
3145     }
3146 
3147     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3148     while (its--) {
3149 
3150       /* lower triangular part: slvec0b = - B^T*xx */
3151       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3152 
3153       /* copy xx into slvec0a */
3154       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3155       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3156       ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr);
3157       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3158 
3159       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
3160 
3161       /* copy bb into slvec1a */
3162       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3163       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3164       ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr);
3165       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3166 
3167       /* set slvec1b = 0 */
3168       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3169 
3170       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3171       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3172       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3173       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3174 
3175       /* upper triangular part: bb1 = bb1 - B*x */
3176       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
3177 
3178       /* local diagonal sweep */
3179       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3180     }
3181     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3182   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3183     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3184   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3185     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3186   } else if (flag & SOR_EISENSTAT) {
3187     Vec               xx1;
3188     PetscBool         hasop;
3189     const PetscScalar *diag;
3190     PetscScalar       *sl,scale = (omega - 2.0)/omega;
3191     PetscInt          i,n;
3192 
3193     if (!mat->xx1) {
3194       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
3195       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
3196     }
3197     xx1 = mat->xx1;
3198     bb1 = mat->bb1;
3199 
3200     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
3201 
3202     if (!mat->diag) {
3203       /* this is wrong for same matrix with new nonzero values */
3204       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
3205       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
3206     }
3207     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
3208 
3209     if (hasop) {
3210       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
3211       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3212     } else {
3213       /*
3214           These two lines are replaced by code that may be a bit faster for a good compiler
3215       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
3216       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3217       */
3218       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3219       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3220       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3221       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3222       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
3223       if (omega == 1.0) {
3224         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3225         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
3226       } else {
3227         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3228         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
3229       }
3230       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3231       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3232       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3233       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3234     }
3235 
3236     /* multiply off-diagonal portion of matrix */
3237     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3238     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3239     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
3240     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3241     ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
3242     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
3243     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3244     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3245     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3246     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
3247 
3248     /* local sweep */
3249     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
3250     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
3251   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3252   PetscFunctionReturn(0);
3253 }
3254 
3255 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3256 {
3257   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3258   PetscErrorCode ierr;
3259   Vec            lvec1,bb1;
3260 
3261   PetscFunctionBegin;
3262   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3263   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3264 
3265   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3266     if (flag & SOR_ZERO_INITIAL_GUESS) {
3267       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3268       its--;
3269     }
3270 
3271     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3272     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3273     while (its--) {
3274       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3275 
3276       /* lower diagonal part: bb1 = bb - B^T*xx */
3277       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3278       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3279 
3280       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3281       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3282       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3283 
3284       /* upper diagonal part: bb1 = bb1 - B*x */
3285       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3286       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3287 
3288       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3289 
3290       /* diagonal sweep */
3291       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3292     }
3293     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3294     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3295   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3296   PetscFunctionReturn(0);
3297 }
3298 
3299 /*@
3300      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3301          CSR format the local rows.
3302 
3303    Collective
3304 
3305    Input Parameters:
3306 +  comm - MPI communicator
3307 .  bs - the block size, only a block size of 1 is supported
3308 .  m - number of local rows (Cannot be PETSC_DECIDE)
3309 .  n - This value should be the same as the local size used in creating the
3310        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3311        calculated if N is given) For square matrices n is almost always m.
3312 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3313 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3314 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
3315 .   j - column indices
3316 -   a - matrix values
3317 
3318    Output Parameter:
3319 .   mat - the matrix
3320 
3321    Level: intermediate
3322 
3323    Notes:
3324        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3325      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3326      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3327 
3328        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3329 
3330 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3331           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3332 @*/
3333 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3334 {
3335   PetscErrorCode ierr;
3336 
3337 
3338   PetscFunctionBegin;
3339   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3340   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3341   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3342   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3343   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3344   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3345   PetscFunctionReturn(0);
3346 }
3347 
3348 
3349 /*@C
3350    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
3351 
3352    Collective
3353 
3354    Input Parameters:
3355 +  B - the matrix
3356 .  bs - the block size
3357 .  i - the indices into j for the start of each local row (starts with zero)
3358 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3359 -  v - optional values in the matrix
3360 
3361    Level: advanced
3362 
3363    Notes:
3364    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
3365    and usually the numerical values as well
3366 
3367    Any entries below the diagonal are ignored
3368 
3369 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3370 @*/
3371 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3372 {
3373   PetscErrorCode ierr;
3374 
3375   PetscFunctionBegin;
3376   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3377   PetscFunctionReturn(0);
3378 }
3379 
3380 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3381 {
3382   PetscErrorCode ierr;
3383   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3384   PetscInt       *indx;
3385   PetscScalar    *values;
3386 
3387   PetscFunctionBegin;
3388   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3389   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3390     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3391     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3392     PetscInt       *bindx,rmax=a->rmax,j;
3393     PetscMPIInt    rank,size;
3394 
3395     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3396     mbs = m/bs; Nbs = N/cbs;
3397     if (n == PETSC_DECIDE) {
3398       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3399     }
3400     nbs = n/cbs;
3401 
3402     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3403     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3404 
3405     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3406     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3407     if (rank == size-1) {
3408       /* Check sum(nbs) = Nbs */
3409       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3410     }
3411 
3412     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3413     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3414     for (i=0; i<mbs; i++) {
3415       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3416       nnz  = nnz/bs;
3417       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3418       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3419       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3420     }
3421     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3422     ierr = PetscFree(bindx);CHKERRQ(ierr);
3423 
3424     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3425     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3426     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3427     ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr);
3428     ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3429     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3430     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3431   }
3432 
3433   /* numeric phase */
3434   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3435   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3436 
3437   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3438   for (i=0; i<m; i++) {
3439     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3440     Ii   = i + rstart;
3441     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3442     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3443   }
3444   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3445   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3446   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3447   PetscFunctionReturn(0);
3448 }
3449