xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision d9ca1df42eda25bc875149d6d493aad6e204c9ff)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
25 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
32   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #undef __FUNCT__
37 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
38 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
39 {
40   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
41   PetscErrorCode ierr;
42 
43   PetscFunctionBegin;
44   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
45   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
50   { \
51  \
52     brow = row/bs;  \
53     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
54     rmax = aimax[brow]; nrow = ailen[brow]; \
55     bcol = col/bs; \
56     ridx = row % bs; cidx = col % bs; \
57     low  = 0; high = nrow; \
58     while (high-low > 3) { \
59       t = (low+high)/2; \
60       if (rp[t] > bcol) high = t; \
61       else              low  = t; \
62     } \
63     for (_i=low; _i<high; _i++) { \
64       if (rp[_i] > bcol) break; \
65       if (rp[_i] == bcol) { \
66         bap = ap + bs2*_i + bs*cidx + ridx; \
67         if (addv == ADD_VALUES) *bap += value;  \
68         else                    *bap  = value;  \
69         goto a_noinsert; \
70       } \
71     } \
72     if (a->nonew == 1) goto a_noinsert; \
73     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
74     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
75     N = nrow++ - 1;  \
76     /* shift up all the later entries in this row */ \
77     for (ii=N; ii>=_i; ii--) { \
78       rp[ii+1] = rp[ii]; \
79       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
80     } \
81     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
82     rp[_i]                      = bcol;  \
83     ap[bs2*_i + bs*cidx + ridx] = value;  \
84     A->nonzerostate++;\
85 a_noinsert:; \
86     ailen[brow] = nrow; \
87   }
88 
89 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
90   { \
91     brow = row/bs;  \
92     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
93     rmax = bimax[brow]; nrow = bilen[brow]; \
94     bcol = col/bs; \
95     ridx = row % bs; cidx = col % bs; \
96     low  = 0; high = nrow; \
97     while (high-low > 3) { \
98       t = (low+high)/2; \
99       if (rp[t] > bcol) high = t; \
100       else              low  = t; \
101     } \
102     for (_i=low; _i<high; _i++) { \
103       if (rp[_i] > bcol) break; \
104       if (rp[_i] == bcol) { \
105         bap = ap + bs2*_i + bs*cidx + ridx; \
106         if (addv == ADD_VALUES) *bap += value;  \
107         else                    *bap  = value;  \
108         goto b_noinsert; \
109       } \
110     } \
111     if (b->nonew == 1) goto b_noinsert; \
112     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
113     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
114     N = nrow++ - 1;  \
115     /* shift up all the later entries in this row */ \
116     for (ii=N; ii>=_i; ii--) { \
117       rp[ii+1] = rp[ii]; \
118       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
119     } \
120     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
121     rp[_i]                      = bcol;  \
122     ap[bs2*_i + bs*cidx + ridx] = value;  \
123     B->nonzerostate++;\
124 b_noinsert:; \
125     bilen[brow] = nrow; \
126   }
127 
128 /* Only add/insert a(i,j) with i<=j (blocks).
129    Any a(i,j) with i>j input by user is ingored.
130 */
131 #undef __FUNCT__
132 #define __FUNCT__ "MatSetValues_MPISBAIJ"
133 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
134 {
135   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
136   MatScalar      value;
137   PetscBool      roworiented = baij->roworiented;
138   PetscErrorCode ierr;
139   PetscInt       i,j,row,col;
140   PetscInt       rstart_orig=mat->rmap->rstart;
141   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
142   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
143 
144   /* Some Variables required in the macro */
145   Mat          A     = baij->A;
146   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
147   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
148   MatScalar    *aa   =a->a;
149 
150   Mat         B     = baij->B;
151   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
152   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
153   MatScalar   *ba   =b->a;
154 
155   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
156   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
157   MatScalar *ap,*bap;
158 
159   /* for stash */
160   PetscInt  n_loc, *in_loc = NULL;
161   MatScalar *v_loc = NULL;
162 
163   PetscFunctionBegin;
164   if (!baij->donotstash) {
165     if (n > baij->n_loc) {
166       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
167       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
168       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
169       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
170 
171       baij->n_loc = n;
172     }
173     in_loc = baij->in_loc;
174     v_loc  = baij->v_loc;
175   }
176 
177   for (i=0; i<m; i++) {
178     if (im[i] < 0) continue;
179 #if defined(PETSC_USE_DEBUG)
180     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
181 #endif
182     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
183       row = im[i] - rstart_orig;              /* local row index */
184       for (j=0; j<n; j++) {
185         if (im[i]/bs > in[j]/bs) {
186           if (a->ignore_ltriangular) {
187             continue;    /* ignore lower triangular blocks */
188           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
189         }
190         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
191           col  = in[j] - cstart_orig;         /* local col index */
192           brow = row/bs; bcol = col/bs;
193           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
194           if (roworiented) value = v[i*n+j];
195           else             value = v[i+j*m];
196           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
197           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
198         } else if (in[j] < 0) continue;
199 #if defined(PETSC_USE_DEBUG)
200         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
201 #endif
202         else {  /* off-diag entry (B) */
203           if (mat->was_assembled) {
204             if (!baij->colmap) {
205               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
206             }
207 #if defined(PETSC_USE_CTABLE)
208             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
209             col  = col - 1;
210 #else
211             col = baij->colmap[in[j]/bs] - 1;
212 #endif
213             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
214               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
215               col  =  in[j];
216               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
217               B    = baij->B;
218               b    = (Mat_SeqBAIJ*)(B)->data;
219               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
220               ba   = b->a;
221             } else col += in[j]%bs;
222           } else col = in[j];
223           if (roworiented) value = v[i*n+j];
224           else             value = v[i+j*m];
225           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
226           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
227         }
228       }
229     } else {  /* off processor entry */
230       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
231       if (!baij->donotstash) {
232         mat->assembled = PETSC_FALSE;
233         n_loc          = 0;
234         for (j=0; j<n; j++) {
235           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
236           in_loc[n_loc] = in[j];
237           if (roworiented) {
238             v_loc[n_loc] = v[i*n+j];
239           } else {
240             v_loc[n_loc] = v[j*m+i];
241           }
242           n_loc++;
243         }
244         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
245       }
246     }
247   }
248   PetscFunctionReturn(0);
249 }
250 
251 #undef __FUNCT__
252 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
253 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
254 {
255   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
256   const MatScalar *value;
257   MatScalar       *barray     =baij->barray;
258   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
259   PetscErrorCode  ierr;
260   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
261   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
262   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
263 
264   PetscFunctionBegin;
265   if (!barray) {
266     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
267     baij->barray = barray;
268   }
269 
270   if (roworiented) {
271     stepval = (n-1)*bs;
272   } else {
273     stepval = (m-1)*bs;
274   }
275   for (i=0; i<m; i++) {
276     if (im[i] < 0) continue;
277 #if defined(PETSC_USE_DEBUG)
278     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
279 #endif
280     if (im[i] >= rstart && im[i] < rend) {
281       row = im[i] - rstart;
282       for (j=0; j<n; j++) {
283         if (im[i] > in[j]) {
284           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
285           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
286         }
287         /* If NumCol = 1 then a copy is not required */
288         if ((roworiented) && (n == 1)) {
289           barray = (MatScalar*) v + i*bs2;
290         } else if ((!roworiented) && (m == 1)) {
291           barray = (MatScalar*) v + j*bs2;
292         } else { /* Here a copy is required */
293           if (roworiented) {
294             value = v + i*(stepval+bs)*bs + j*bs;
295           } else {
296             value = v + j*(stepval+bs)*bs + i*bs;
297           }
298           for (ii=0; ii<bs; ii++,value+=stepval) {
299             for (jj=0; jj<bs; jj++) {
300               *barray++ = *value++;
301             }
302           }
303           barray -=bs2;
304         }
305 
306         if (in[j] >= cstart && in[j] < cend) {
307           col  = in[j] - cstart;
308           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
309         } else if (in[j] < 0) continue;
310 #if defined(PETSC_USE_DEBUG)
311         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
312 #endif
313         else {
314           if (mat->was_assembled) {
315             if (!baij->colmap) {
316               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
317             }
318 
319 #if defined(PETSC_USE_DEBUG)
320 #if defined(PETSC_USE_CTABLE)
321             { PetscInt data;
322               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
323               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
324             }
325 #else
326             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327 #endif
328 #endif
329 #if defined(PETSC_USE_CTABLE)
330             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
331             col  = (col - 1)/bs;
332 #else
333             col = (baij->colmap[in[j]] - 1)/bs;
334 #endif
335             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
336               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
337               col  = in[j];
338             }
339           } else col = in[j];
340           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
341         }
342       }
343     } else {
344       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
345       if (!baij->donotstash) {
346         if (roworiented) {
347           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
348         } else {
349           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
350         }
351       }
352     }
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "MatGetValues_MPISBAIJ"
359 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
360 {
361   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
364   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
365 
366   PetscFunctionBegin;
367   for (i=0; i<m; i++) {
368     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
369     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
370     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
371       row = idxm[i] - bsrstart;
372       for (j=0; j<n; j++) {
373         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
374         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
375         if (idxn[j] >= bscstart && idxn[j] < bscend) {
376           col  = idxn[j] - bscstart;
377           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
378         } else {
379           if (!baij->colmap) {
380             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381           }
382 #if defined(PETSC_USE_CTABLE)
383           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
384           data--;
385 #else
386           data = baij->colmap[idxn[j]/bs]-1;
387 #endif
388           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
389           else {
390             col  = data + idxn[j]%bs;
391             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
392           }
393         }
394       }
395     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
396   }
397   PetscFunctionReturn(0);
398 }
399 
400 #undef __FUNCT__
401 #define __FUNCT__ "MatNorm_MPISBAIJ"
402 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
403 {
404   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
405   PetscErrorCode ierr;
406   PetscReal      sum[2],*lnorm2;
407 
408   PetscFunctionBegin;
409   if (baij->size == 1) {
410     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
411   } else {
412     if (type == NORM_FROBENIUS) {
413       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
414       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
415       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
416       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
417       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
418       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
419       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
420       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
421     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
422       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
423       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
424       PetscReal    *rsum,*rsum2,vabs;
425       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
426       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
427       MatScalar    *v;
428 
429       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
430       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
431       /* Amat */
432       v = amat->a; jj = amat->j;
433       for (brow=0; brow<mbs; brow++) {
434         grow = bs*(rstart + brow);
435         nz   = amat->i[brow+1] - amat->i[brow];
436         for (bcol=0; bcol<nz; bcol++) {
437           gcol = bs*(rstart + *jj); jj++;
438           for (col=0; col<bs; col++) {
439             for (row=0; row<bs; row++) {
440               vabs            = PetscAbsScalar(*v); v++;
441               rsum[gcol+col] += vabs;
442               /* non-diagonal block */
443               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
444             }
445           }
446         }
447       }
448       /* Bmat */
449       v = bmat->a; jj = bmat->j;
450       for (brow=0; brow<mbs; brow++) {
451         grow = bs*(rstart + brow);
452         nz = bmat->i[brow+1] - bmat->i[brow];
453         for (bcol=0; bcol<nz; bcol++) {
454           gcol = bs*garray[*jj]; jj++;
455           for (col=0; col<bs; col++) {
456             for (row=0; row<bs; row++) {
457               vabs            = PetscAbsScalar(*v); v++;
458               rsum[gcol+col] += vabs;
459               rsum[grow+row] += vabs;
460             }
461           }
462         }
463       }
464       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
465       *norm = 0.0;
466       for (col=0; col<mat->cmap->N; col++) {
467         if (rsum2[col] > *norm) *norm = rsum2[col];
468       }
469       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
470     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
471   }
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNCT__
476 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
477 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
478 {
479   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
480   PetscErrorCode ierr;
481   PetscInt       nstash,reallocs;
482   InsertMode     addv;
483 
484   PetscFunctionBegin;
485   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
486 
487   /* make sure all processors are either in INSERTMODE or ADDMODE */
488   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
489   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
490   mat->insertmode = addv; /* in case this processor had no cache */
491 
492   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
493   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
494   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
495   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
496   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
497   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 #undef __FUNCT__
502 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
503 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
504 {
505   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
506   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
507   PetscErrorCode ierr;
508   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
509   PetscInt       *row,*col;
510   PetscBool      other_disassembled;
511   PetscMPIInt    n;
512   PetscBool      r1,r2,r3;
513   MatScalar      *val;
514   InsertMode     addv = mat->insertmode;
515 
516   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
517   PetscFunctionBegin;
518   if (!baij->donotstash &&  !mat->nooffprocentries) {
519     while (1) {
520       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
521       if (!flg) break;
522 
523       for (i=0; i<n;) {
524         /* Now identify the consecutive vals belonging to the same row */
525         for (j=i,rstart=row[j]; j<n; j++) {
526           if (row[j] != rstart) break;
527         }
528         if (j < n) ncols = j-i;
529         else       ncols = n-i;
530         /* Now assemble all these values with a single function call */
531         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
532         i    = j;
533       }
534     }
535     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
536     /* Now process the block-stash. Since the values are stashed column-oriented,
537        set the roworiented flag to column oriented, and after MatSetValues()
538        restore the original flags */
539     r1 = baij->roworiented;
540     r2 = a->roworiented;
541     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
542 
543     baij->roworiented = PETSC_FALSE;
544     a->roworiented    = PETSC_FALSE;
545 
546     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
547     while (1) {
548       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
549       if (!flg) break;
550 
551       for (i=0; i<n;) {
552         /* Now identify the consecutive vals belonging to the same row */
553         for (j=i,rstart=row[j]; j<n; j++) {
554           if (row[j] != rstart) break;
555         }
556         if (j < n) ncols = j-i;
557         else       ncols = n-i;
558         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
559         i    = j;
560       }
561     }
562     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
563 
564     baij->roworiented = r1;
565     a->roworiented    = r2;
566 
567     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
568   }
569 
570   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
571   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
572 
573   /* determine if any processor has disassembled, if so we must
574      also disassemble ourselfs, in order that we may reassemble. */
575   /*
576      if nonzero structure of submatrix B cannot change then we know that
577      no processor disassembled thus we can skip this stuff
578   */
579   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
580     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
581     if (mat->was_assembled && !other_disassembled) {
582       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
583     }
584   }
585 
586   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
587     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
588   }
589   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
590   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
591 
592   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
593 
594   baij->rowvalues = 0;
595 
596   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
597   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
598     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
599     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
605 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
606 #include <petscdraw.h>
607 #undef __FUNCT__
608 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
609 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
610 {
611   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
612   PetscErrorCode    ierr;
613   PetscInt          bs   = mat->rmap->bs;
614   PetscMPIInt       rank = baij->rank;
615   PetscBool         iascii,isdraw;
616   PetscViewer       sviewer;
617   PetscViewerFormat format;
618 
619   PetscFunctionBegin;
620   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
621   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
622   if (iascii) {
623     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
624     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
625       MatInfo info;
626       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
627       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
630       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
632       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
633       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
634       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
635       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
636       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
637       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
638       PetscFunctionReturn(0);
639     } else if (format == PETSC_VIEWER_ASCII_INFO) {
640       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
641       PetscFunctionReturn(0);
642     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
643       PetscFunctionReturn(0);
644     }
645   }
646 
647   if (isdraw) {
648     PetscDraw draw;
649     PetscBool isnull;
650     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
651     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
652   }
653 
654   {
655     /* assemble the entire matrix onto first processor. */
656     Mat          A;
657     Mat_SeqSBAIJ *Aloc;
658     Mat_SeqBAIJ  *Bloc;
659     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
660     MatScalar    *a;
661     const char   *matname;
662 
663     /* Should this be the same type as mat? */
664     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
665     if (!rank) {
666       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
667     } else {
668       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
669     }
670     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
671     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
672     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
673     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
674 
675     /* copy over the A part */
676     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
677     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
678     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
679 
680     for (i=0; i<mbs; i++) {
681       rvals[0] = bs*(baij->rstartbs + i);
682       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
683       for (j=ai[i]; j<ai[i+1]; j++) {
684         col = (baij->cstartbs+aj[j])*bs;
685         for (k=0; k<bs; k++) {
686           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
687           col++;
688           a += bs;
689         }
690       }
691     }
692     /* copy over the B part */
693     Bloc = (Mat_SeqBAIJ*)baij->B->data;
694     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
695     for (i=0; i<mbs; i++) {
696 
697       rvals[0] = bs*(baij->rstartbs + i);
698       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
699       for (j=ai[i]; j<ai[i+1]; j++) {
700         col = baij->garray[aj[j]]*bs;
701         for (k=0; k<bs; k++) {
702           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
703           col++;
704           a += bs;
705         }
706       }
707     }
708     ierr = PetscFree(rvals);CHKERRQ(ierr);
709     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
710     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
711     /*
712        Everyone has to call to draw the matrix since the graphics waits are
713        synchronized across all processors that share the PetscDraw object
714     */
715     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
716     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
717     if (!rank) {
718       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
719       ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
720     }
721     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
722     ierr = MatDestroy(&A);CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatView_MPISBAIJ"
729 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
730 {
731   PetscErrorCode ierr;
732   PetscBool      iascii,isdraw,issocket,isbinary;
733 
734   PetscFunctionBegin;
735   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
736   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
737   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
739   if (iascii || isdraw || issocket || isbinary) {
740     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatDestroy_MPISBAIJ"
747 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
748 {
749   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753 #if defined(PETSC_USE_LOG)
754   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
755 #endif
756   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
757   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
758   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
759   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
760 #if defined(PETSC_USE_CTABLE)
761   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
762 #else
763   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
764 #endif
765   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
767   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
768   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
773   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
774   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
775   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
776   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
777   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
778   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
779   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
780 #if defined(PETSC_USE_REAL_MAT_SINGLE)
781   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
782 #endif
783   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
786   ierr = PetscFree(mat->data);CHKERRQ(ierr);
787 
788   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
799 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
800 {
801   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
802   PetscErrorCode ierr;
803   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
804   PetscScalar    *x,*from;
805 
806   PetscFunctionBegin;
807   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
808   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
809 
810   /* diagonal part */
811   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
812   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
813 
814   /* subdiagonal part */
815   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
816 
817   /* copy x into the vec slvec0 */
818   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820 
821   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
822   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
823   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
824 
825   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   /* supperdiagonal part */
828   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatMult_MPISBAIJ"
834 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
835 {
836   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
837   PetscErrorCode    ierr;
838   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
839   PetscScalar       *from;
840   const PetscScalar *x;
841 
842   PetscFunctionBegin;
843   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
844   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
845 
846   /* diagonal part */
847   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
848   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
849 
850   /* subdiagonal part */
851   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
852 
853   /* copy x into the vec slvec0 */
854   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
855   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
856 
857   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
858   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
859   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
860 
861   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863   /* supperdiagonal part */
864   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
870 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
871 {
872   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
873   PetscErrorCode ierr;
874   PetscInt       nt;
875 
876   PetscFunctionBegin;
877   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
878   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
879 
880   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
881   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
882 
883   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884   /* do diagonal part */
885   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
886   /* do supperdiagonal part */
887   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
888   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
889   /* do subdiagonal part */
890   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
891   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
892   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
898 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
899 {
900   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
901   PetscErrorCode    ierr;
902   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
903   PetscScalar       *from,zero=0.0;
904   const PetscScalar *x;
905 
906   PetscFunctionBegin;
907   /*
908   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
909   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
910   */
911   /* diagonal part */
912   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
913   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
914 
915   /* subdiagonal part */
916   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
917 
918   /* copy x into the vec slvec0 */
919   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
920   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
921   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
922   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
923 
924   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
925   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
926   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
927 
928   /* supperdiagonal part */
929   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
935 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
936 {
937   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
938   PetscErrorCode ierr;
939 
940   PetscFunctionBegin;
941   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942   /* do diagonal part */
943   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
944   /* do supperdiagonal part */
945   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
947 
948   /* do subdiagonal part */
949   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
950   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
951   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
952   PetscFunctionReturn(0);
953 }
954 
955 /*
956   This only works correctly for square matrices where the subblock A->A is the
957    diagonal block
958 */
959 #undef __FUNCT__
960 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
961 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
962 {
963   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
964   PetscErrorCode ierr;
965 
966   PetscFunctionBegin;
967   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
968   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatScale_MPISBAIJ"
974 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
975 {
976   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
977   PetscErrorCode ierr;
978 
979   PetscFunctionBegin;
980   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
981   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
982   PetscFunctionReturn(0);
983 }
984 
985 #undef __FUNCT__
986 #define __FUNCT__ "MatGetRow_MPISBAIJ"
987 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
988 {
989   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
990   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
991   PetscErrorCode ierr;
992   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
993   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
994   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
995 
996   PetscFunctionBegin;
997   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
998   mat->getrowactive = PETSC_TRUE;
999 
1000   if (!mat->rowvalues && (idx || v)) {
1001     /*
1002         allocate enough space to hold information from the longest row.
1003     */
1004     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1005     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1006     PetscInt     max = 1,mbs = mat->mbs,tmp;
1007     for (i=0; i<mbs; i++) {
1008       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1009       if (max < tmp) max = tmp;
1010     }
1011     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1012   }
1013 
1014   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1015   lrow = row - brstart;  /* local row index */
1016 
1017   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1018   if (!v)   {pvA = 0; pvB = 0;}
1019   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1020   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1021   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1022   nztot = nzA + nzB;
1023 
1024   cmap = mat->garray;
1025   if (v  || idx) {
1026     if (nztot) {
1027       /* Sort by increasing column numbers, assuming A and B already sorted */
1028       PetscInt imark = -1;
1029       if (v) {
1030         *v = v_p = mat->rowvalues;
1031         for (i=0; i<nzB; i++) {
1032           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1033           else break;
1034         }
1035         imark = i;
1036         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1037         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1038       }
1039       if (idx) {
1040         *idx = idx_p = mat->rowindices;
1041         if (imark > -1) {
1042           for (i=0; i<imark; i++) {
1043             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1044           }
1045         } else {
1046           for (i=0; i<nzB; i++) {
1047             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1048             else break;
1049           }
1050           imark = i;
1051         }
1052         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1053         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1054       }
1055     } else {
1056       if (idx) *idx = 0;
1057       if (v)   *v   = 0;
1058     }
1059   }
1060   *nz  = nztot;
1061   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1062   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1068 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1069 {
1070   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1071 
1072   PetscFunctionBegin;
1073   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1074   baij->getrowactive = PETSC_FALSE;
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1080 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1081 {
1082   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1083   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1084 
1085   PetscFunctionBegin;
1086   aA->getrow_utriangular = PETSC_TRUE;
1087   PetscFunctionReturn(0);
1088 }
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1091 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1092 {
1093   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1094   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1095 
1096   PetscFunctionBegin;
1097   aA->getrow_utriangular = PETSC_FALSE;
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1103 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1104 {
1105   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1106   PetscErrorCode ierr;
1107 
1108   PetscFunctionBegin;
1109   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1110   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1116 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1117 {
1118   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1119   PetscErrorCode ierr;
1120 
1121   PetscFunctionBegin;
1122   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1123   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1129 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1130 {
1131   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1136   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1142 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1143 {
1144   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1145   Mat            A  = a->A,B = a->B;
1146   PetscErrorCode ierr;
1147   PetscReal      isend[5],irecv[5];
1148 
1149   PetscFunctionBegin;
1150   info->block_size = (PetscReal)matin->rmap->bs;
1151 
1152   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1153 
1154   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1155   isend[3] = info->memory;  isend[4] = info->mallocs;
1156 
1157   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1158 
1159   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1160   isend[3] += info->memory;  isend[4] += info->mallocs;
1161   if (flag == MAT_LOCAL) {
1162     info->nz_used      = isend[0];
1163     info->nz_allocated = isend[1];
1164     info->nz_unneeded  = isend[2];
1165     info->memory       = isend[3];
1166     info->mallocs      = isend[4];
1167   } else if (flag == MAT_GLOBAL_MAX) {
1168     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1169 
1170     info->nz_used      = irecv[0];
1171     info->nz_allocated = irecv[1];
1172     info->nz_unneeded  = irecv[2];
1173     info->memory       = irecv[3];
1174     info->mallocs      = irecv[4];
1175   } else if (flag == MAT_GLOBAL_SUM) {
1176     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1177 
1178     info->nz_used      = irecv[0];
1179     info->nz_allocated = irecv[1];
1180     info->nz_unneeded  = irecv[2];
1181     info->memory       = irecv[3];
1182     info->mallocs      = irecv[4];
1183   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1184   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1185   info->fill_ratio_needed = 0;
1186   info->factor_mallocs    = 0;
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1192 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1193 {
1194   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1195   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199   switch (op) {
1200   case MAT_NEW_NONZERO_LOCATIONS:
1201   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1202   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1203   case MAT_KEEP_NONZERO_PATTERN:
1204   case MAT_NEW_NONZERO_LOCATION_ERR:
1205     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1206     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1207     break;
1208   case MAT_ROW_ORIENTED:
1209     a->roworiented = flg;
1210 
1211     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1212     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1213     break;
1214   case MAT_NEW_DIAGONALS:
1215     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1216     break;
1217   case MAT_IGNORE_OFF_PROC_ENTRIES:
1218     a->donotstash = flg;
1219     break;
1220   case MAT_USE_HASH_TABLE:
1221     a->ht_flag = flg;
1222     break;
1223   case MAT_HERMITIAN:
1224     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1225     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1226 
1227     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1228     break;
1229   case MAT_SPD:
1230     A->spd_set = PETSC_TRUE;
1231     A->spd     = flg;
1232     if (flg) {
1233       A->symmetric                  = PETSC_TRUE;
1234       A->structurally_symmetric     = PETSC_TRUE;
1235       A->symmetric_set              = PETSC_TRUE;
1236       A->structurally_symmetric_set = PETSC_TRUE;
1237     }
1238     break;
1239   case MAT_SYMMETRIC:
1240     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1241     break;
1242   case MAT_STRUCTURALLY_SYMMETRIC:
1243     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1244     break;
1245   case MAT_SYMMETRY_ETERNAL:
1246     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1247     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1248     break;
1249   case MAT_IGNORE_LOWER_TRIANGULAR:
1250     aA->ignore_ltriangular = flg;
1251     break;
1252   case MAT_ERROR_LOWER_TRIANGULAR:
1253     aA->ignore_ltriangular = flg;
1254     break;
1255   case MAT_GETROW_UPPERTRIANGULAR:
1256     aA->getrow_utriangular = flg;
1257     break;
1258   default:
1259     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1260   }
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNCT__
1265 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1266 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1267 {
1268   PetscErrorCode ierr;
1269 
1270   PetscFunctionBegin;
1271   if (MAT_INITIAL_MATRIX || *B != A) {
1272     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1273   }
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1279 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1280 {
1281   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1282   Mat            a     = baij->A, b=baij->B;
1283   PetscErrorCode ierr;
1284   PetscInt       nv,m,n;
1285   PetscBool      flg;
1286 
1287   PetscFunctionBegin;
1288   if (ll != rr) {
1289     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1290     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1291   }
1292   if (!ll) PetscFunctionReturn(0);
1293 
1294   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1295   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1296 
1297   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1298   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1299 
1300   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1301 
1302   /* left diagonalscale the off-diagonal part */
1303   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1304 
1305   /* scale the diagonal part */
1306   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1307 
1308   /* right diagonalscale the off-diagonal part */
1309   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1310   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 #undef __FUNCT__
1315 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1316 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1317 {
1318   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1319   PetscErrorCode ierr;
1320 
1321   PetscFunctionBegin;
1322   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatEqual_MPISBAIJ"
1330 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1331 {
1332   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1333   Mat            a,b,c,d;
1334   PetscBool      flg;
1335   PetscErrorCode ierr;
1336 
1337   PetscFunctionBegin;
1338   a = matA->A; b = matA->B;
1339   c = matB->A; d = matB->B;
1340 
1341   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1342   if (flg) {
1343     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1344   }
1345   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatCopy_MPISBAIJ"
1351 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1352 {
1353   PetscErrorCode ierr;
1354   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1355   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1356 
1357   PetscFunctionBegin;
1358   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1359   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1360     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1361     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1362     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1363   } else {
1364     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1365     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1366   }
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1372 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1373 {
1374   PetscErrorCode ierr;
1375 
1376   PetscFunctionBegin;
1377   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1383 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1384 {
1385   PetscErrorCode ierr;
1386   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1387   PetscBLASInt   bnz,one=1;
1388   Mat_SeqSBAIJ   *xa,*ya;
1389   Mat_SeqBAIJ    *xb,*yb;
1390 
1391   PetscFunctionBegin;
1392   if (str == SAME_NONZERO_PATTERN) {
1393     PetscScalar alpha = a;
1394     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1395     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1396     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1397     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1398     xb   = (Mat_SeqBAIJ*)xx->B->data;
1399     yb   = (Mat_SeqBAIJ*)yy->B->data;
1400     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1401     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1402     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1403   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1404     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1405     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1406     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1407   } else {
1408     Mat      B;
1409     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1410     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1411     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1412     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1413     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1414     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1415     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1416     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1417     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1418     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1419     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1420     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1421     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1422     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1423     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1424     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1425     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1426     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1427     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1428     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1429   }
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1435 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1436 {
1437   PetscErrorCode ierr;
1438   PetscInt       i;
1439   PetscBool      flg;
1440 
1441   PetscFunctionBegin;
1442   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1443   for (i=0; i<n; i++) {
1444     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1445     if (!flg) { /* *B[i] is non-symmetric, set flag */
1446       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1447     }
1448   }
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 /* -------------------------------------------------------------------*/
1453 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1454                                        MatGetRow_MPISBAIJ,
1455                                        MatRestoreRow_MPISBAIJ,
1456                                        MatMult_MPISBAIJ,
1457                                /*  4*/ MatMultAdd_MPISBAIJ,
1458                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1459                                        MatMultAdd_MPISBAIJ,
1460                                        0,
1461                                        0,
1462                                        0,
1463                                /* 10*/ 0,
1464                                        0,
1465                                        0,
1466                                        MatSOR_MPISBAIJ,
1467                                        MatTranspose_MPISBAIJ,
1468                                /* 15*/ MatGetInfo_MPISBAIJ,
1469                                        MatEqual_MPISBAIJ,
1470                                        MatGetDiagonal_MPISBAIJ,
1471                                        MatDiagonalScale_MPISBAIJ,
1472                                        MatNorm_MPISBAIJ,
1473                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1474                                        MatAssemblyEnd_MPISBAIJ,
1475                                        MatSetOption_MPISBAIJ,
1476                                        MatZeroEntries_MPISBAIJ,
1477                                /* 24*/ 0,
1478                                        0,
1479                                        0,
1480                                        0,
1481                                        0,
1482                                /* 29*/ MatSetUp_MPISBAIJ,
1483                                        0,
1484                                        0,
1485                                        0,
1486                                        0,
1487                                /* 34*/ MatDuplicate_MPISBAIJ,
1488                                        0,
1489                                        0,
1490                                        0,
1491                                        0,
1492                                /* 39*/ MatAXPY_MPISBAIJ,
1493                                        MatGetSubMatrices_MPISBAIJ,
1494                                        MatIncreaseOverlap_MPISBAIJ,
1495                                        MatGetValues_MPISBAIJ,
1496                                        MatCopy_MPISBAIJ,
1497                                /* 44*/ 0,
1498                                        MatScale_MPISBAIJ,
1499                                        0,
1500                                        0,
1501                                        0,
1502                                /* 49*/ 0,
1503                                        0,
1504                                        0,
1505                                        0,
1506                                        0,
1507                                /* 54*/ 0,
1508                                        0,
1509                                        MatSetUnfactored_MPISBAIJ,
1510                                        0,
1511                                        MatSetValuesBlocked_MPISBAIJ,
1512                                /* 59*/ 0,
1513                                        0,
1514                                        0,
1515                                        0,
1516                                        0,
1517                                /* 64*/ 0,
1518                                        0,
1519                                        0,
1520                                        0,
1521                                        0,
1522                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1523                                        0,
1524                                        0,
1525                                        0,
1526                                        0,
1527                                /* 74*/ 0,
1528                                        0,
1529                                        0,
1530                                        0,
1531                                        0,
1532                                /* 79*/ 0,
1533                                        0,
1534                                        0,
1535                                        0,
1536                                        MatLoad_MPISBAIJ,
1537                                /* 84*/ 0,
1538                                        0,
1539                                        0,
1540                                        0,
1541                                        0,
1542                                /* 89*/ 0,
1543                                        0,
1544                                        0,
1545                                        0,
1546                                        0,
1547                                /* 94*/ 0,
1548                                        0,
1549                                        0,
1550                                        0,
1551                                        0,
1552                                /* 99*/ 0,
1553                                        0,
1554                                        0,
1555                                        0,
1556                                        0,
1557                                /*104*/ 0,
1558                                        MatRealPart_MPISBAIJ,
1559                                        MatImaginaryPart_MPISBAIJ,
1560                                        MatGetRowUpperTriangular_MPISBAIJ,
1561                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1562                                /*109*/ 0,
1563                                        0,
1564                                        0,
1565                                        0,
1566                                        0,
1567                                /*114*/ 0,
1568                                        0,
1569                                        0,
1570                                        0,
1571                                        0,
1572                                /*119*/ 0,
1573                                        0,
1574                                        0,
1575                                        0,
1576                                        0,
1577                                /*124*/ 0,
1578                                        0,
1579                                        0,
1580                                        0,
1581                                        0,
1582                                /*129*/ 0,
1583                                        0,
1584                                        0,
1585                                        0,
1586                                        0,
1587                                /*134*/ 0,
1588                                        0,
1589                                        0,
1590                                        0,
1591                                        0,
1592                                /*139*/ 0,
1593                                        0,
1594                                        0,
1595                                        0,
1596                                        0,
1597                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1598 };
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1602 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1603 {
1604   PetscFunctionBegin;
1605   *a = ((Mat_MPISBAIJ*)A->data)->A;
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 #undef __FUNCT__
1610 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1611 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1612 {
1613   Mat_MPISBAIJ   *b;
1614   PetscErrorCode ierr;
1615   PetscInt       i,mbs,Mbs;
1616 
1617   PetscFunctionBegin;
1618   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1619   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1620   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1621   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1622 
1623   b   = (Mat_MPISBAIJ*)B->data;
1624   mbs = B->rmap->n/bs;
1625   Mbs = B->rmap->N/bs;
1626   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1627 
1628   B->rmap->bs = bs;
1629   b->bs2      = bs*bs;
1630   b->mbs      = mbs;
1631   b->Mbs      = Mbs;
1632   b->nbs      = B->cmap->n/bs;
1633   b->Nbs      = B->cmap->N/bs;
1634 
1635   for (i=0; i<=b->size; i++) {
1636     b->rangebs[i] = B->rmap->range[i]/bs;
1637   }
1638   b->rstartbs = B->rmap->rstart/bs;
1639   b->rendbs   = B->rmap->rend/bs;
1640 
1641   b->cstartbs = B->cmap->rstart/bs;
1642   b->cendbs   = B->cmap->rend/bs;
1643 
1644   if (!B->preallocated) {
1645     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1646     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1647     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1648     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1649     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1650     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1651     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1652     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1653     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1654   }
1655 
1656   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1657   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1658 
1659   B->preallocated = PETSC_TRUE;
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 #undef __FUNCT__
1664 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1665 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1666 {
1667   PetscInt       m,rstart,cstart,cend;
1668   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1669   const PetscInt *JJ    =0;
1670   PetscScalar    *values=0;
1671   PetscErrorCode ierr;
1672 
1673   PetscFunctionBegin;
1674   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1675   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1676   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1677   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1678   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1679   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1680   m      = B->rmap->n/bs;
1681   rstart = B->rmap->rstart/bs;
1682   cstart = B->cmap->rstart/bs;
1683   cend   = B->cmap->rend/bs;
1684 
1685   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1686   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1687   for (i=0; i<m; i++) {
1688     nz = ii[i+1] - ii[i];
1689     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1690     nz_max = PetscMax(nz_max,nz);
1691     JJ     = jj + ii[i];
1692     for (j=0; j<nz; j++) {
1693       if (*JJ >= cstart) break;
1694       JJ++;
1695     }
1696     d = 0;
1697     for (; j<nz; j++) {
1698       if (*JJ++ >= cend) break;
1699       d++;
1700     }
1701     d_nnz[i] = d;
1702     o_nnz[i] = nz - d;
1703   }
1704   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1705   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1706 
1707   values = (PetscScalar*)V;
1708   if (!values) {
1709     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1710     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1711   }
1712   for (i=0; i<m; i++) {
1713     PetscInt          row    = i + rstart;
1714     PetscInt          ncols  = ii[i+1] - ii[i];
1715     const PetscInt    *icols = jj + ii[i];
1716     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1717     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1718   }
1719 
1720   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1721   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1723   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 /*MC
1728    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1729    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1730    the matrix is stored.
1731 
1732   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1733   can call MatSetOption(Mat, MAT_HERMITIAN);
1734 
1735    Options Database Keys:
1736 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1737 
1738   Level: beginner
1739 
1740 .seealso: MatCreateMPISBAIJ
1741 M*/
1742 
1743 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1744 
1745 #undef __FUNCT__
1746 #define __FUNCT__ "MatCreate_MPISBAIJ"
1747 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1748 {
1749   Mat_MPISBAIJ   *b;
1750   PetscErrorCode ierr;
1751   PetscBool      flg = PETSC_FALSE;
1752 
1753   PetscFunctionBegin;
1754   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1755   B->data = (void*)b;
1756   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1757 
1758   B->ops->destroy = MatDestroy_MPISBAIJ;
1759   B->ops->view    = MatView_MPISBAIJ;
1760   B->assembled    = PETSC_FALSE;
1761   B->insertmode   = NOT_SET_VALUES;
1762 
1763   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1764   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1765 
1766   /* build local table of row and column ownerships */
1767   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
1768 
1769   /* build cache for off array entries formed */
1770   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1771 
1772   b->donotstash  = PETSC_FALSE;
1773   b->colmap      = NULL;
1774   b->garray      = NULL;
1775   b->roworiented = PETSC_TRUE;
1776 
1777   /* stuff used in block assembly */
1778   b->barray = 0;
1779 
1780   /* stuff used for matrix vector multiply */
1781   b->lvec    = 0;
1782   b->Mvctx   = 0;
1783   b->slvec0  = 0;
1784   b->slvec0b = 0;
1785   b->slvec1  = 0;
1786   b->slvec1a = 0;
1787   b->slvec1b = 0;
1788   b->sMvctx  = 0;
1789 
1790   /* stuff for MatGetRow() */
1791   b->rowindices   = 0;
1792   b->rowvalues    = 0;
1793   b->getrowactive = PETSC_FALSE;
1794 
1795   /* hash table stuff */
1796   b->ht           = 0;
1797   b->hd           = 0;
1798   b->ht_size      = 0;
1799   b->ht_flag      = PETSC_FALSE;
1800   b->ht_fact      = 0;
1801   b->ht_total_ct  = 0;
1802   b->ht_insert_ct = 0;
1803 
1804   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1805   b->ijonly = PETSC_FALSE;
1806 
1807   b->in_loc = 0;
1808   b->v_loc  = 0;
1809   b->n_loc  = 0;
1810 
1811   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1812   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1813   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1814   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1815   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1816   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1817 
1818   B->symmetric                  = PETSC_TRUE;
1819   B->structurally_symmetric     = PETSC_TRUE;
1820   B->symmetric_set              = PETSC_TRUE;
1821   B->structurally_symmetric_set = PETSC_TRUE;
1822 
1823   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1824   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1825   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
1826   if (flg) {
1827     PetscReal fact = 1.39;
1828     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1829     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1830     if (fact <= 1.0) fact = 1.39;
1831     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1832     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1833   }
1834   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 /*MC
1839    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1840 
1841    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1842    and MATMPISBAIJ otherwise.
1843 
1844    Options Database Keys:
1845 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1846 
1847   Level: beginner
1848 
1849 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1850 M*/
1851 
1852 #undef __FUNCT__
1853 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1854 /*@C
1855    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1856    the user should preallocate the matrix storage by setting the parameters
1857    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1858    performance can be increased by more than a factor of 50.
1859 
1860    Collective on Mat
1861 
1862    Input Parameters:
1863 +  B - the matrix
1864 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1865           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1866 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1867            submatrix  (same for all local rows)
1868 .  d_nnz - array containing the number of block nonzeros in the various block rows
1869            in the upper triangular and diagonal part of the in diagonal portion of the local
1870            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1871            for the diagonal entry and set a value even if it is zero.
1872 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1873            submatrix (same for all local rows).
1874 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1875            off-diagonal portion of the local submatrix that is right of the diagonal
1876            (possibly different for each block row) or NULL.
1877 
1878 
1879    Options Database Keys:
1880 .   -mat_no_unroll - uses code that does not unroll the loops in the
1881                      block calculations (much slower)
1882 .   -mat_block_size - size of the blocks to use
1883 
1884    Notes:
1885 
1886    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1887    than it must be used on all processors that share the object for that argument.
1888 
1889    If the *_nnz parameter is given then the *_nz parameter is ignored
1890 
1891    Storage Information:
1892    For a square global matrix we define each processor's diagonal portion
1893    to be its local rows and the corresponding columns (a square submatrix);
1894    each processor's off-diagonal portion encompasses the remainder of the
1895    local matrix (a rectangular submatrix).
1896 
1897    The user can specify preallocated storage for the diagonal part of
1898    the local submatrix with either d_nz or d_nnz (not both).  Set
1899    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1900    memory allocation.  Likewise, specify preallocated storage for the
1901    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1902 
1903    You can call MatGetInfo() to get information on how effective the preallocation was;
1904    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1905    You can also run with the option -info and look for messages with the string
1906    malloc in them to see if additional memory allocation was needed.
1907 
1908    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1909    the figure below we depict these three local rows and all columns (0-11).
1910 
1911 .vb
1912            0 1 2 3 4 5 6 7 8 9 10 11
1913           --------------------------
1914    row 3  |. . . d d d o o o o  o  o
1915    row 4  |. . . d d d o o o o  o  o
1916    row 5  |. . . d d d o o o o  o  o
1917           --------------------------
1918 .ve
1919 
1920    Thus, any entries in the d locations are stored in the d (diagonal)
1921    submatrix, and any entries in the o locations are stored in the
1922    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1923    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1924 
1925    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1926    plus the diagonal part of the d matrix,
1927    and o_nz should indicate the number of block nonzeros per row in the o matrix
1928 
1929    In general, for PDE problems in which most nonzeros are near the diagonal,
1930    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1931    or you will get TERRIBLE performance; see the users' manual chapter on
1932    matrices.
1933 
1934    Level: intermediate
1935 
1936 .keywords: matrix, block, aij, compressed row, sparse, parallel
1937 
1938 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1939 @*/
1940 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1941 {
1942   PetscErrorCode ierr;
1943 
1944   PetscFunctionBegin;
1945   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1946   PetscValidType(B,1);
1947   PetscValidLogicalCollectiveInt(B,bs,2);
1948   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatCreateSBAIJ"
1954 /*@C
1955    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1956    (block compressed row).  For good matrix assembly performance
1957    the user should preallocate the matrix storage by setting the parameters
1958    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1959    performance can be increased by more than a factor of 50.
1960 
1961    Collective on MPI_Comm
1962 
1963    Input Parameters:
1964 +  comm - MPI communicator
1965 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1966           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1967 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1968            This value should be the same as the local size used in creating the
1969            y vector for the matrix-vector product y = Ax.
1970 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1971            This value should be the same as the local size used in creating the
1972            x vector for the matrix-vector product y = Ax.
1973 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1974 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1975 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1976            submatrix  (same for all local rows)
1977 .  d_nnz - array containing the number of block nonzeros in the various block rows
1978            in the upper triangular portion of the in diagonal portion of the local
1979            (possibly different for each block block row) or NULL.
1980            If you plan to factor the matrix you must leave room for the diagonal entry and
1981            set its value even if it is zero.
1982 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1983            submatrix (same for all local rows).
1984 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1985            off-diagonal portion of the local submatrix (possibly different for
1986            each block row) or NULL.
1987 
1988    Output Parameter:
1989 .  A - the matrix
1990 
1991    Options Database Keys:
1992 .   -mat_no_unroll - uses code that does not unroll the loops in the
1993                      block calculations (much slower)
1994 .   -mat_block_size - size of the blocks to use
1995 .   -mat_mpi - use the parallel matrix data structures even on one processor
1996                (defaults to using SeqBAIJ format on one processor)
1997 
1998    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1999    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2000    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2001 
2002    Notes:
2003    The number of rows and columns must be divisible by blocksize.
2004    This matrix type does not support complex Hermitian operation.
2005 
2006    The user MUST specify either the local or global matrix dimensions
2007    (possibly both).
2008 
2009    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2010    than it must be used on all processors that share the object for that argument.
2011 
2012    If the *_nnz parameter is given then the *_nz parameter is ignored
2013 
2014    Storage Information:
2015    For a square global matrix we define each processor's diagonal portion
2016    to be its local rows and the corresponding columns (a square submatrix);
2017    each processor's off-diagonal portion encompasses the remainder of the
2018    local matrix (a rectangular submatrix).
2019 
2020    The user can specify preallocated storage for the diagonal part of
2021    the local submatrix with either d_nz or d_nnz (not both).  Set
2022    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2023    memory allocation.  Likewise, specify preallocated storage for the
2024    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2025 
2026    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2027    the figure below we depict these three local rows and all columns (0-11).
2028 
2029 .vb
2030            0 1 2 3 4 5 6 7 8 9 10 11
2031           --------------------------
2032    row 3  |. . . d d d o o o o  o  o
2033    row 4  |. . . d d d o o o o  o  o
2034    row 5  |. . . d d d o o o o  o  o
2035           --------------------------
2036 .ve
2037 
2038    Thus, any entries in the d locations are stored in the d (diagonal)
2039    submatrix, and any entries in the o locations are stored in the
2040    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2041    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2042 
2043    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2044    plus the diagonal part of the d matrix,
2045    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2046    In general, for PDE problems in which most nonzeros are near the diagonal,
2047    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2048    or you will get TERRIBLE performance; see the users' manual chapter on
2049    matrices.
2050 
2051    Level: intermediate
2052 
2053 .keywords: matrix, block, aij, compressed row, sparse, parallel
2054 
2055 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2056 @*/
2057 
2058 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2059 {
2060   PetscErrorCode ierr;
2061   PetscMPIInt    size;
2062 
2063   PetscFunctionBegin;
2064   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2065   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2066   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2067   if (size > 1) {
2068     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2069     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2070   } else {
2071     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2072     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2073   }
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2080 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2081 {
2082   Mat            mat;
2083   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2084   PetscErrorCode ierr;
2085   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2086   PetscScalar    *array;
2087 
2088   PetscFunctionBegin;
2089   *newmat = 0;
2090 
2091   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2092   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2093   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2094   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2095   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2096   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2097 
2098   mat->factortype   = matin->factortype;
2099   mat->preallocated = PETSC_TRUE;
2100   mat->assembled    = PETSC_TRUE;
2101   mat->insertmode   = NOT_SET_VALUES;
2102 
2103   a      = (Mat_MPISBAIJ*)mat->data;
2104   a->bs2 = oldmat->bs2;
2105   a->mbs = oldmat->mbs;
2106   a->nbs = oldmat->nbs;
2107   a->Mbs = oldmat->Mbs;
2108   a->Nbs = oldmat->Nbs;
2109 
2110 
2111   a->size         = oldmat->size;
2112   a->rank         = oldmat->rank;
2113   a->donotstash   = oldmat->donotstash;
2114   a->roworiented  = oldmat->roworiented;
2115   a->rowindices   = 0;
2116   a->rowvalues    = 0;
2117   a->getrowactive = PETSC_FALSE;
2118   a->barray       = 0;
2119   a->rstartbs     = oldmat->rstartbs;
2120   a->rendbs       = oldmat->rendbs;
2121   a->cstartbs     = oldmat->cstartbs;
2122   a->cendbs       = oldmat->cendbs;
2123 
2124   /* hash table stuff */
2125   a->ht           = 0;
2126   a->hd           = 0;
2127   a->ht_size      = 0;
2128   a->ht_flag      = oldmat->ht_flag;
2129   a->ht_fact      = oldmat->ht_fact;
2130   a->ht_total_ct  = 0;
2131   a->ht_insert_ct = 0;
2132 
2133   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2134   if (oldmat->colmap) {
2135 #if defined(PETSC_USE_CTABLE)
2136     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2137 #else
2138     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2139     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2140     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2141 #endif
2142   } else a->colmap = 0;
2143 
2144   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2145     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2146     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2147     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2148   } else a->garray = 0;
2149 
2150   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2151   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2152   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2153   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2154   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2155 
2156   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2157   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2158   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2159   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2160 
2161   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2162   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2163   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2164   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2165   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2166   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2167   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2168   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2169   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2170   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2171   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2172   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2173   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2174 
2175   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2176   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2177   a->sMvctx = oldmat->sMvctx;
2178   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2179 
2180   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2181   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2182   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2183   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2184   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2185   *newmat = mat;
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 #undef __FUNCT__
2190 #define __FUNCT__ "MatLoad_MPISBAIJ"
2191 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2192 {
2193   PetscErrorCode ierr;
2194   PetscInt       i,nz,j,rstart,rend;
2195   PetscScalar    *vals,*buf;
2196   MPI_Comm       comm;
2197   MPI_Status     status;
2198   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2199   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2200   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2201   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2202   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2203   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2204   int            fd;
2205 
2206   PetscFunctionBegin;
2207   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2208   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2209   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2210   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2211   if (bs < 0) bs = 1;
2212 
2213   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2214   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2215   if (!rank) {
2216     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2217     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2218     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2219     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2220   }
2221 
2222   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2223 
2224   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2225   M    = header[1];
2226   N    = header[2];
2227 
2228   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2229   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2230   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2231 
2232   /* If global sizes are set, check if they are consistent with that given in the file */
2233   if (sizesset) {
2234     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2235   }
2236   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2237   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2238 
2239   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2240 
2241   /*
2242      This code adds extra rows to make sure the number of rows is
2243      divisible by the blocksize
2244   */
2245   Mbs        = M/bs;
2246   extra_rows = bs - M + bs*(Mbs);
2247   if (extra_rows == bs) extra_rows = 0;
2248   else                  Mbs++;
2249   if (extra_rows &&!rank) {
2250     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2251   }
2252 
2253   /* determine ownership of all rows */
2254   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2255     mbs = Mbs/size + ((Mbs % size) > rank);
2256     m   = mbs*bs;
2257   } else { /* User Set */
2258     m   = newmat->rmap->n;
2259     mbs = m/bs;
2260   }
2261   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2262   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2263   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2264   rowners[0] = 0;
2265   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2266   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2267   rstart = rowners[rank];
2268   rend   = rowners[rank+1];
2269 
2270   /* distribute row lengths to all processors */
2271   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2272   if (!rank) {
2273     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2274     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2275     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2276     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2277     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2278     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2279     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2280   } else {
2281     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2282   }
2283 
2284   if (!rank) {   /* procs[0] */
2285     /* calculate the number of nonzeros on each processor */
2286     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2287     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2288     for (i=0; i<size; i++) {
2289       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2290         procsnz[i] += rowlengths[j];
2291       }
2292     }
2293     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2294 
2295     /* determine max buffer needed and allocate it */
2296     maxnz = 0;
2297     for (i=0; i<size; i++) {
2298       maxnz = PetscMax(maxnz,procsnz[i]);
2299     }
2300     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2301 
2302     /* read in my part of the matrix column indices  */
2303     nz     = procsnz[0];
2304     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2305     mycols = ibuf;
2306     if (size == 1) nz -= extra_rows;
2307     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2308     if (size == 1) {
2309       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2310     }
2311 
2312     /* read in every ones (except the last) and ship off */
2313     for (i=1; i<size-1; i++) {
2314       nz   = procsnz[i];
2315       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2316       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2317     }
2318     /* read in the stuff for the last proc */
2319     if (size != 1) {
2320       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2321       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2322       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2323       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2324     }
2325     ierr = PetscFree(cols);CHKERRQ(ierr);
2326   } else {  /* procs[i], i>0 */
2327     /* determine buffer space needed for message */
2328     nz = 0;
2329     for (i=0; i<m; i++) nz += locrowlens[i];
2330     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2331     mycols = ibuf;
2332     /* receive message of column indices*/
2333     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2334     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2335     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2336   }
2337 
2338   /* loop over local rows, determining number of off diagonal entries */
2339   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2340   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2341   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2342   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2343   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2344   rowcount = 0;
2345   nzcount  = 0;
2346   for (i=0; i<mbs; i++) {
2347     dcount  = 0;
2348     odcount = 0;
2349     for (j=0; j<bs; j++) {
2350       kmax = locrowlens[rowcount];
2351       for (k=0; k<kmax; k++) {
2352         tmp = mycols[nzcount++]/bs; /* block col. index */
2353         if (!mask[tmp]) {
2354           mask[tmp] = 1;
2355           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2356           else masked1[dcount++] = tmp; /* entry in diag portion */
2357         }
2358       }
2359       rowcount++;
2360     }
2361 
2362     dlens[i]  = dcount;  /* d_nzz[i] */
2363     odlens[i] = odcount; /* o_nzz[i] */
2364 
2365     /* zero out the mask elements we set */
2366     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2367     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2368   }
2369   if (!sizesset) {
2370     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2371   }
2372   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2373   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2374 
2375   if (!rank) {
2376     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2377     /* read in my part of the matrix numerical values  */
2378     nz     = procsnz[0];
2379     vals   = buf;
2380     mycols = ibuf;
2381     if (size == 1) nz -= extra_rows;
2382     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2383     if (size == 1) {
2384       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2385     }
2386 
2387     /* insert into matrix */
2388     jj = rstart*bs;
2389     for (i=0; i<m; i++) {
2390       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2391       mycols += locrowlens[i];
2392       vals   += locrowlens[i];
2393       jj++;
2394     }
2395 
2396     /* read in other processors (except the last one) and ship out */
2397     for (i=1; i<size-1; i++) {
2398       nz   = procsnz[i];
2399       vals = buf;
2400       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2401       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2402     }
2403     /* the last proc */
2404     if (size != 1) {
2405       nz   = procsnz[i] - extra_rows;
2406       vals = buf;
2407       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2408       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2409       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2410     }
2411     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2412 
2413   } else {
2414     /* receive numeric values */
2415     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2416 
2417     /* receive message of values*/
2418     vals   = buf;
2419     mycols = ibuf;
2420     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2421     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2422     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2423 
2424     /* insert into matrix */
2425     jj = rstart*bs;
2426     for (i=0; i<m; i++) {
2427       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2428       mycols += locrowlens[i];
2429       vals   += locrowlens[i];
2430       jj++;
2431     }
2432   }
2433 
2434   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2435   ierr = PetscFree(buf);CHKERRQ(ierr);
2436   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2437   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2438   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2439   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2440   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2441   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 #undef __FUNCT__
2446 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2447 /*XXXXX@
2448    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2449 
2450    Input Parameters:
2451 .  mat  - the matrix
2452 .  fact - factor
2453 
2454    Not Collective on Mat, each process can have a different hash factor
2455 
2456    Level: advanced
2457 
2458   Notes:
2459    This can also be set by the command line option: -mat_use_hash_table fact
2460 
2461 .keywords: matrix, hashtable, factor, HT
2462 
2463 .seealso: MatSetOption()
2464 @XXXXX*/
2465 
2466 
2467 #undef __FUNCT__
2468 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2469 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2470 {
2471   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2472   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2473   PetscReal      atmp;
2474   PetscReal      *work,*svalues,*rvalues;
2475   PetscErrorCode ierr;
2476   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2477   PetscMPIInt    rank,size;
2478   PetscInt       *rowners_bs,dest,count,source;
2479   PetscScalar    *va;
2480   MatScalar      *ba;
2481   MPI_Status     stat;
2482 
2483   PetscFunctionBegin;
2484   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2485   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2486   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2487 
2488   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2489   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2490 
2491   bs  = A->rmap->bs;
2492   mbs = a->mbs;
2493   Mbs = a->Mbs;
2494   ba  = b->a;
2495   bi  = b->i;
2496   bj  = b->j;
2497 
2498   /* find ownerships */
2499   rowners_bs = A->rmap->range;
2500 
2501   /* each proc creates an array to be distributed */
2502   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2503   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2504 
2505   /* row_max for B */
2506   if (rank != size-1) {
2507     for (i=0; i<mbs; i++) {
2508       ncols = bi[1] - bi[0]; bi++;
2509       brow  = bs*i;
2510       for (j=0; j<ncols; j++) {
2511         bcol = bs*(*bj);
2512         for (kcol=0; kcol<bs; kcol++) {
2513           col  = bcol + kcol;                /* local col index */
2514           col += rowners_bs[rank+1];      /* global col index */
2515           for (krow=0; krow<bs; krow++) {
2516             atmp = PetscAbsScalar(*ba); ba++;
2517             row  = brow + krow;   /* local row index */
2518             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2519             if (work[col] < atmp) work[col] = atmp;
2520           }
2521         }
2522         bj++;
2523       }
2524     }
2525 
2526     /* send values to its owners */
2527     for (dest=rank+1; dest<size; dest++) {
2528       svalues = work + rowners_bs[dest];
2529       count   = rowners_bs[dest+1]-rowners_bs[dest];
2530       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2531     }
2532   }
2533 
2534   /* receive values */
2535   if (rank) {
2536     rvalues = work;
2537     count   = rowners_bs[rank+1]-rowners_bs[rank];
2538     for (source=0; source<rank; source++) {
2539       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2540       /* process values */
2541       for (i=0; i<count; i++) {
2542         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2543       }
2544     }
2545   }
2546 
2547   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2548   ierr = PetscFree(work);CHKERRQ(ierr);
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 #undef __FUNCT__
2553 #define __FUNCT__ "MatSOR_MPISBAIJ"
2554 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2555 {
2556   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2557   PetscErrorCode    ierr;
2558   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2559   PetscScalar       *x,*ptr,*from;
2560   Vec               bb1;
2561   const PetscScalar *b;
2562 
2563   PetscFunctionBegin;
2564   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2565   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2566 
2567   if (flag == SOR_APPLY_UPPER) {
2568     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2569     PetscFunctionReturn(0);
2570   }
2571 
2572   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2573     if (flag & SOR_ZERO_INITIAL_GUESS) {
2574       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2575       its--;
2576     }
2577 
2578     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2579     while (its--) {
2580 
2581       /* lower triangular part: slvec0b = - B^T*xx */
2582       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2583 
2584       /* copy xx into slvec0a */
2585       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2586       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2587       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2588       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2589 
2590       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2591 
2592       /* copy bb into slvec1a */
2593       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2594       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2595       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2596       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2597 
2598       /* set slvec1b = 0 */
2599       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2600 
2601       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2602       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2603       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2604       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2605 
2606       /* upper triangular part: bb1 = bb1 - B*x */
2607       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2608 
2609       /* local diagonal sweep */
2610       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2611     }
2612     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2613   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2614     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2615   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2616     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2617   } else if (flag & SOR_EISENSTAT) {
2618     Vec               xx1;
2619     PetscBool         hasop;
2620     const PetscScalar *diag;
2621     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2622     PetscInt          i,n;
2623 
2624     if (!mat->xx1) {
2625       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2626       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2627     }
2628     xx1 = mat->xx1;
2629     bb1 = mat->bb1;
2630 
2631     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2632 
2633     if (!mat->diag) {
2634       /* this is wrong for same matrix with new nonzero values */
2635       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2636       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2637     }
2638     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2639 
2640     if (hasop) {
2641       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2642       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2643     } else {
2644       /*
2645           These two lines are replaced by code that may be a bit faster for a good compiler
2646       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2647       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2648       */
2649       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2650       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2651       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2652       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2653       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2654       if (omega == 1.0) {
2655         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2656         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2657       } else {
2658         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2659         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2660       }
2661       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2662       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2663       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2664       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2665     }
2666 
2667     /* multiply off-diagonal portion of matrix */
2668     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2669     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2670     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2671     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2672     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2673     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2674     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2675     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2676     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2677     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2678 
2679     /* local sweep */
2680     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2681     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2682   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2683   PetscFunctionReturn(0);
2684 }
2685 
2686 #undef __FUNCT__
2687 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2688 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2689 {
2690   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2691   PetscErrorCode ierr;
2692   Vec            lvec1,bb1;
2693 
2694   PetscFunctionBegin;
2695   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2696   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2697 
2698   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2699     if (flag & SOR_ZERO_INITIAL_GUESS) {
2700       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2701       its--;
2702     }
2703 
2704     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2705     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2706     while (its--) {
2707       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2708 
2709       /* lower diagonal part: bb1 = bb - B^T*xx */
2710       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2711       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2712 
2713       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2714       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2715       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2716 
2717       /* upper diagonal part: bb1 = bb1 - B*x */
2718       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2719       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2720 
2721       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2722 
2723       /* diagonal sweep */
2724       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2725     }
2726     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2727     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2728   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2729   PetscFunctionReturn(0);
2730 }
2731 
2732 #undef __FUNCT__
2733 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2734 /*@
2735      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2736          CSR format the local rows.
2737 
2738    Collective on MPI_Comm
2739 
2740    Input Parameters:
2741 +  comm - MPI communicator
2742 .  bs - the block size, only a block size of 1 is supported
2743 .  m - number of local rows (Cannot be PETSC_DECIDE)
2744 .  n - This value should be the same as the local size used in creating the
2745        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2746        calculated if N is given) For square matrices n is almost always m.
2747 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2748 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2749 .   i - row indices
2750 .   j - column indices
2751 -   a - matrix values
2752 
2753    Output Parameter:
2754 .   mat - the matrix
2755 
2756    Level: intermediate
2757 
2758    Notes:
2759        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2760      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2761      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2762 
2763        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2764 
2765 .keywords: matrix, aij, compressed row, sparse, parallel
2766 
2767 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2768           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2769 @*/
2770 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2771 {
2772   PetscErrorCode ierr;
2773 
2774 
2775   PetscFunctionBegin;
2776   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2777   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2778   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2779   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2780   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2781   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2782   PetscFunctionReturn(0);
2783 }
2784 
2785 
2786 #undef __FUNCT__
2787 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2788 /*@C
2789    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2790    (the default parallel PETSc format).
2791 
2792    Collective on MPI_Comm
2793 
2794    Input Parameters:
2795 +  B - the matrix
2796 .  bs - the block size
2797 .  i - the indices into j for the start of each local row (starts with zero)
2798 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2799 -  v - optional values in the matrix
2800 
2801    Level: developer
2802 
2803 .keywords: matrix, aij, compressed row, sparse, parallel
2804 
2805 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2806 @*/
2807 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2808 {
2809   PetscErrorCode ierr;
2810 
2811   PetscFunctionBegin;
2812   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2813   PetscFunctionReturn(0);
2814 }
2815 
2816 #undef __FUNCT__
2817 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
2818 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2819 {
2820   PetscErrorCode ierr;
2821   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2822   PetscInt       *indx;
2823   PetscScalar    *values;
2824 
2825   PetscFunctionBegin;
2826   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2827   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2828     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2829     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
2830     PetscInt       *bindx,rmax=a->rmax,j;
2831 
2832     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2833     mbs = m/bs; Nbs = N/cbs;
2834     if (n == PETSC_DECIDE) {
2835       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
2836     }
2837     /* Check sum(n) = Nbs */
2838     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2839     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
2840 
2841     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2842     rstart -= mbs;
2843 
2844     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2845     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
2846     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2847     for (i=0; i<mbs; i++) {
2848       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2849       nnz = nnz/bs;
2850       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2851       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2852       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2853     }
2854     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2855     ierr = PetscFree(bindx);CHKERRQ(ierr);
2856 
2857     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2858     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2859     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2860     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
2861     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2862     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2863   }
2864 
2865   /* numeric phase */
2866   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2867   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
2868 
2869   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2870   for (i=0; i<m; i++) {
2871     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2872     Ii   = i + rstart;
2873     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2874     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2875   }
2876   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2877   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2878   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2879   PetscFunctionReturn(0);
2880 }
2881