xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 2adc2576a28085d58fb8abb8bfa40b01341acf64)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
55 { \
56  \
57     brow = row/bs;  \
58     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
59     rmax = aimax[brow]; nrow = ailen[brow]; \
60       bcol = col/bs; \
61       ridx = row % bs; cidx = col % bs; \
62       low = 0; high = nrow; \
63       while (high-low > 3) { \
64         t = (low+high)/2; \
65         if (rp[t] > bcol) high = t; \
66         else              low  = t; \
67       } \
68       for (_i=low; _i<high; _i++) { \
69         if (rp[_i] > bcol) break; \
70         if (rp[_i] == bcol) { \
71           bap  = ap +  bs2*_i + bs*cidx + ridx; \
72           if (addv == ADD_VALUES) *bap += value;  \
73           else                    *bap  = value;  \
74           goto a_noinsert; \
75         } \
76       } \
77       if (a->nonew == 1) goto a_noinsert; \
78       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
79       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
80       N = nrow++ - 1;  \
81       /* shift up all the later entries in this row */ \
82       for (ii=N; ii>=_i; ii--) { \
83         rp[ii+1] = rp[ii]; \
84         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
85       } \
86       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
87       rp[_i]                      = bcol;  \
88       ap[bs2*_i + bs*cidx + ridx] = value;  \
89       a_noinsert:; \
90     ailen[brow] = nrow; \
91 }
92 
93 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
94 { \
95     brow = row/bs;  \
96     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
97     rmax = bimax[brow]; nrow = bilen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto b_noinsert; \
113         } \
114       } \
115       if (b->nonew == 1) goto b_noinsert; \
116       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
117       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
118       N = nrow++ - 1;  \
119       /* shift up all the later entries in this row */ \
120       for (ii=N; ii>=_i; ii--) { \
121         rp[ii+1] = rp[ii]; \
122         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123       } \
124       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
125       rp[_i]                      = bcol;  \
126       ap[bs2*_i + bs*cidx + ridx] = value;  \
127       b_noinsert:; \
128     bilen[brow] = nrow; \
129 }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat            A = baij->A;
149   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar      *aa=a->a;
152 
153   Mat            B = baij->B;
154   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar     *ba=b->a;
157 
158   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar     *ap,*bap;
161 
162   /* for stash */
163   PetscInt      n_loc, *in_loc = PETSC_NULL;
164   MatScalar     *v_loc = PETSC_NULL;
165 
166   PetscFunctionBegin;
167   if (v) PetscValidScalarPointer(v,6);
168   if (!baij->donotstash){
169     if (n > baij->n_loc) {
170       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
171       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs){
189           if (a->ignore_ltriangular){
190             continue;    /* ignore lower triangular blocks */
191           } else {
192             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
193           }
194         }
195         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
196           col = in[j] - cstart_orig;          /* local col index */
197           brow = row/bs; bcol = col/bs;
198           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
199           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
200           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
201           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
202         } else if (in[j] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
205 #endif
206         else {  /* off-diag entry (B) */
207           if (mat->was_assembled) {
208             if (!baij->colmap) {
209               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
210             }
211 #if defined (PETSC_USE_CTABLE)
212             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
213             col  = col - 1;
214 #else
215             col = baij->colmap[in[j]/bs] - 1;
216 #endif
217             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
218               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
219               col =  in[j];
220               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
221               B = baij->B;
222               b = (Mat_SeqBAIJ*)(B)->data;
223               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
224               ba=b->a;
225             } else col += in[j]%bs;
226           } else col = in[j];
227           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
228           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231       }
232     } else {  /* off processor entry */
233       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
234       if (!baij->donotstash) {
235         mat->assembled = PETSC_FALSE;
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (im[i] > in[j]) {
287           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
288           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
289         }
290         /* If NumCol = 1 then a copy is not required */
291         if ((roworiented) && (n == 1)) {
292           barray = (MatScalar*) v + i*bs2;
293         } else if((!roworiented) && (m == 1)) {
294           barray = (MatScalar*) v + j*bs2;
295         } else { /* Here a copy is required */
296           if (roworiented) {
297             value = v + i*(stepval+bs)*bs + j*bs;
298           } else {
299             value = v + j*(stepval+bs)*bs + i*bs;
300           }
301           for (ii=0; ii<bs; ii++,value+=stepval) {
302             for (jj=0; jj<bs; jj++) {
303               *barray++  = *value++;
304             }
305           }
306           barray -=bs2;
307         }
308 
309         if (in[j] >= cstart && in[j] < cend){
310           col  = in[j] - cstart;
311           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
312         }
313         else if (in[j] < 0) continue;
314 #if defined(PETSC_USE_DEBUG)
315         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
316 #endif
317         else {
318           if (mat->was_assembled) {
319             if (!baij->colmap) {
320               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
321             }
322 
323 #if defined(PETSC_USE_DEBUG)
324 #if defined (PETSC_USE_CTABLE)
325             { PetscInt data;
326               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
327               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
328             }
329 #else
330             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
331 #endif
332 #endif
333 #if defined (PETSC_USE_CTABLE)
334 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
335             col  = (col - 1)/bs;
336 #else
337             col = (baij->colmap[in[j]] - 1)/bs;
338 #endif
339             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
340               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
341               col =  in[j];
342             }
343           }
344           else col = in[j];
345           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
346         }
347       }
348     } else {
349       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
350       if (!baij->donotstash) {
351         if (roworiented) {
352           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         } else {
354           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
355         }
356       }
357     }
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatGetValues_MPISBAIJ"
364 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
365 {
366   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
367   PetscErrorCode ierr;
368   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
369   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
370 
371   PetscFunctionBegin;
372   for (i=0; i<m; i++) {
373     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
374     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
375     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
376       row = idxm[i] - bsrstart;
377       for (j=0; j<n; j++) {
378         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
379         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
380         if (idxn[j] >= bscstart && idxn[j] < bscend){
381           col = idxn[j] - bscstart;
382           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
383         } else {
384           if (!baij->colmap) {
385             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
386           }
387 #if defined (PETSC_USE_CTABLE)
388           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
389           data --;
390 #else
391           data = baij->colmap[idxn[j]/bs]-1;
392 #endif
393           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
394           else {
395             col  = data + idxn[j]%bs;
396             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
397           }
398         }
399       }
400     } else {
401       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
402     }
403   }
404  PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatNorm_MPISBAIJ"
409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
410 {
411   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscReal      sum[2],*lnorm2;
414 
415   PetscFunctionBegin;
416   if (baij->size == 1) {
417     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
418   } else {
419     if (type == NORM_FROBENIUS) {
420       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
421       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
423       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
424       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
425       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
426       *norm = PetscSqrtReal(sum[0] + 2*sum[1]);
427       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
428     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
429       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
430       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
431       PetscReal    *rsum,*rsum2,vabs;
432       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
433       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
434       MatScalar    *v;
435 
436       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
437       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
438       /* Amat */
439       v = amat->a; jj = amat->j;
440       for (brow=0; brow<mbs; brow++) {
441         grow = bs*(rstart + brow);
442         nz = amat->i[brow+1] - amat->i[brow];
443         for (bcol=0; bcol<nz; bcol++){
444           gcol = bs*(rstart + *jj); jj++;
445           for (col=0; col<bs; col++){
446             for (row=0; row<bs; row++){
447               vabs = PetscAbsScalar(*v); v++;
448               rsum[gcol+col] += vabs;
449               /* non-diagonal block */
450               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
451             }
452           }
453         }
454       }
455       /* Bmat */
456       v = bmat->a; jj = bmat->j;
457       for (brow=0; brow<mbs; brow++) {
458         grow = bs*(rstart + brow);
459         nz = bmat->i[brow+1] - bmat->i[brow];
460         for (bcol=0; bcol<nz; bcol++){
461           gcol = bs*garray[*jj]; jj++;
462           for (col=0; col<bs; col++){
463             for (row=0; row<bs; row++){
464               vabs = PetscAbsScalar(*v); v++;
465               rsum[gcol+col] += vabs;
466               rsum[grow+row] += vabs;
467             }
468           }
469         }
470       }
471       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
472       *norm = 0.0;
473       for (col=0; col<mat->cmap->N; col++) {
474         if (rsum2[col] > *norm) *norm = rsum2[col];
475       }
476       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
477     } else {
478       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
479     }
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
486 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
487 {
488   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
489   PetscErrorCode ierr;
490   PetscInt       nstash,reallocs;
491   InsertMode     addv;
492 
493   PetscFunctionBegin;
494   if (baij->donotstash || mat->nooffprocentries) {
495     PetscFunctionReturn(0);
496   }
497 
498   /* make sure all processors are either in INSERTMODE or ADDMODE */
499   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
500   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
501   mat->insertmode = addv; /* in case this processor had no cache */
502 
503   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
504   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
508   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
514 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
515 {
516   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
517   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
518   PetscErrorCode ierr;
519   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
520   PetscInt       *row,*col;
521   PetscBool      other_disassembled;
522   PetscMPIInt    n;
523   PetscBool      r1,r2,r3;
524   MatScalar      *val;
525   InsertMode     addv = mat->insertmode;
526 
527   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
528   PetscFunctionBegin;
529 
530   if (!baij->donotstash &&  !mat->nooffprocentries) {
531     while (1) {
532       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
533       if (!flg) break;
534 
535       for (i=0; i<n;) {
536         /* Now identify the consecutive vals belonging to the same row */
537         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
538         if (j < n) ncols = j-i;
539         else       ncols = n-i;
540         /* Now assemble all these values with a single function call */
541         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
542         i = j;
543       }
544     }
545     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
546     /* Now process the block-stash. Since the values are stashed column-oriented,
547        set the roworiented flag to column oriented, and after MatSetValues()
548        restore the original flags */
549     r1 = baij->roworiented;
550     r2 = a->roworiented;
551     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
552     baij->roworiented = PETSC_FALSE;
553     a->roworiented    = PETSC_FALSE;
554     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
555     while (1) {
556       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
557       if (!flg) break;
558 
559       for (i=0; i<n;) {
560         /* Now identify the consecutive vals belonging to the same row */
561         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
562         if (j < n) ncols = j-i;
563         else       ncols = n-i;
564         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
565         i = j;
566       }
567     }
568     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
569     baij->roworiented = r1;
570     a->roworiented    = r2;
571     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
572   }
573 
574   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
575   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
576 
577   /* determine if any processor has disassembled, if so we must
578      also disassemble ourselfs, in order that we may reassemble. */
579   /*
580      if nonzero structure of submatrix B cannot change then we know that
581      no processor disassembled thus we can skip this stuff
582   */
583   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
584     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
585     if (mat->was_assembled && !other_disassembled) {
586       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
587     }
588   }
589 
590   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
591     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
592   }
593   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr);
594   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
595   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
596 
597   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
598   baij->rowvalues = 0;
599 
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
604 #undef __FUNCT__
605 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
606 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
607 {
608   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
609   PetscErrorCode    ierr;
610   PetscInt          bs = mat->rmap->bs;
611   PetscMPIInt       size = baij->size,rank = baij->rank;
612   PetscBool         iascii,isdraw;
613   PetscViewer       sviewer;
614   PetscViewerFormat format;
615 
616   PetscFunctionBegin;
617   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
618   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
619   if (iascii) {
620     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
621     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
622       MatInfo info;
623       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
624       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
625       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
626       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
627       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
629       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
630       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
631       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
632       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
633       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
634       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
635       PetscFunctionReturn(0);
636     } else if (format == PETSC_VIEWER_ASCII_INFO) {
637       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
638       PetscFunctionReturn(0);
639     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
640       PetscFunctionReturn(0);
641     }
642   }
643 
644   if (isdraw) {
645     PetscDraw  draw;
646     PetscBool  isnull;
647     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
648     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
649   }
650 
651   if (size == 1) {
652     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
653     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
654   } else {
655     /* assemble the entire matrix onto first processor. */
656     Mat          A;
657     Mat_SeqSBAIJ *Aloc;
658     Mat_SeqBAIJ  *Bloc;
659     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
660     MatScalar    *a;
661 
662     /* Should this be the same type as mat? */
663     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
664     if (!rank) {
665       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
666     } else {
667       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
668     }
669     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
670     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
671     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
672     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
673 
674     /* copy over the A part */
675     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
676     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
677     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
678 
679     for (i=0; i<mbs; i++) {
680       rvals[0] = bs*(baij->rstartbs + i);
681       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
682       for (j=ai[i]; j<ai[i+1]; j++) {
683         col = (baij->cstartbs+aj[j])*bs;
684         for (k=0; k<bs; k++) {
685           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
686           col++; a += bs;
687         }
688       }
689     }
690     /* copy over the B part */
691     Bloc = (Mat_SeqBAIJ*)baij->B->data;
692     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
693     for (i=0; i<mbs; i++) {
694 
695       rvals[0] = bs*(baij->rstartbs + i);
696       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
697       for (j=ai[i]; j<ai[i+1]; j++) {
698         col = baij->garray[aj[j]]*bs;
699         for (k=0; k<bs; k++) {
700           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
701           col++; a += bs;
702         }
703       }
704     }
705     ierr = PetscFree(rvals);CHKERRQ(ierr);
706     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
707     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     /*
709        Everyone has to call to draw the matrix since the graphics waits are
710        synchronized across all processors that share the PetscDraw object
711     */
712     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
713     if (!rank) {
714       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
715           /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
716       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
717       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
718     }
719     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
720     ierr = MatDestroy(&A);CHKERRQ(ierr);
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatView_MPISBAIJ"
727 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
728 {
729   PetscErrorCode ierr;
730   PetscBool      iascii,isdraw,issocket,isbinary;
731 
732   PetscFunctionBegin;
733   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
734   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
735   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
736   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
737   if (iascii || isdraw || issocket || isbinary) {
738     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
739   } else {
740     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatDestroy_MPISBAIJ"
747 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
748 {
749   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753 #if defined(PETSC_USE_LOG)
754   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
755 #endif
756   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
757   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
758   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
759   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
760 #if defined (PETSC_USE_CTABLE)
761   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
762 #else
763   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
764 #endif
765   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
767   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
768   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
773   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
774   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
775   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
776   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
777   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
778   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
779   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
780 #if defined(PETSC_USE_REAL_MAT_SINGLE)
781   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
782 #endif
783   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
786   ierr = PetscFree(mat->data);CHKERRQ(ierr);
787 
788   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
799 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
800 {
801   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
802   PetscErrorCode ierr;
803   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
804   PetscScalar    *x,*from;
805 
806   PetscFunctionBegin;
807   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
808   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
809 
810   /* diagonal part */
811   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
812   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
813 
814   /* subdiagonal part */
815   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
816 
817   /* copy x into the vec slvec0 */
818   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820 
821   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
822   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
823   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
824 
825   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   /* supperdiagonal part */
828   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatMult_MPISBAIJ"
834 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
835 {
836   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
837   PetscErrorCode ierr;
838   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
839   PetscScalar    *x,*from;
840 
841   PetscFunctionBegin;
842   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
843   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
844 
845   /* diagonal part */
846   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
847   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
848 
849   /* subdiagonal part */
850   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
851 
852   /* copy x into the vec slvec0 */
853   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
854   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855 
856   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
857   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
858   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
859 
860   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   /* supperdiagonal part */
863   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
869 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
870 {
871   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
872   PetscErrorCode ierr;
873   PetscInt       nt;
874 
875   PetscFunctionBegin;
876   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
877   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
878 
879   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
880   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
881 
882   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883   /* do diagonal part */
884   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
885   /* do supperdiagonal part */
886   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
888   /* do subdiagonal part */
889   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
890   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
891   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
892 
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
898 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
899 {
900   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
901   PetscErrorCode ierr;
902   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
903   PetscScalar    *x,*from,zero=0.0;
904 
905   PetscFunctionBegin;
906   /*
907   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
908   PetscSynchronizedFlush(((PetscObject)A)->comm);
909   */
910   /* diagonal part */
911   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
912   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
913 
914   /* subdiagonal part */
915   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
916 
917   /* copy x into the vec slvec0 */
918   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
919   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
920   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
921   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
922 
923   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926 
927   /* supperdiagonal part */
928   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
929 
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
935 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
936 {
937   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
938   PetscErrorCode ierr;
939 
940   PetscFunctionBegin;
941   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942   /* do diagonal part */
943   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
944   /* do supperdiagonal part */
945   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
947 
948   /* do subdiagonal part */
949   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
950   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
951   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
952 
953   PetscFunctionReturn(0);
954 }
955 
956 /*
957   This only works correctly for square matrices where the subblock A->A is the
958    diagonal block
959 */
960 #undef __FUNCT__
961 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
962 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
963 {
964   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
969   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "MatScale_MPISBAIJ"
975 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
976 {
977   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
978   PetscErrorCode ierr;
979 
980   PetscFunctionBegin;
981   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
982   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "MatGetRow_MPISBAIJ"
988 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
989 {
990   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
991   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
992   PetscErrorCode ierr;
993   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
994   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
995   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
996 
997   PetscFunctionBegin;
998   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
999   mat->getrowactive = PETSC_TRUE;
1000 
1001   if (!mat->rowvalues && (idx || v)) {
1002     /*
1003         allocate enough space to hold information from the longest row.
1004     */
1005     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1006     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1007     PetscInt     max = 1,mbs = mat->mbs,tmp;
1008     for (i=0; i<mbs; i++) {
1009       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1010       if (max < tmp) { max = tmp; }
1011     }
1012     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1013   }
1014 
1015   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1016   lrow = row - brstart;  /* local row index */
1017 
1018   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1019   if (!v)   {pvA = 0; pvB = 0;}
1020   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1021   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1022   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1023   nztot = nzA + nzB;
1024 
1025   cmap  = mat->garray;
1026   if (v  || idx) {
1027     if (nztot) {
1028       /* Sort by increasing column numbers, assuming A and B already sorted */
1029       PetscInt imark = -1;
1030       if (v) {
1031         *v = v_p = mat->rowvalues;
1032         for (i=0; i<nzB; i++) {
1033           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1034           else break;
1035         }
1036         imark = i;
1037         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1038         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1039       }
1040       if (idx) {
1041         *idx = idx_p = mat->rowindices;
1042         if (imark > -1) {
1043           for (i=0; i<imark; i++) {
1044             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1045           }
1046         } else {
1047           for (i=0; i<nzB; i++) {
1048             if (cmap[cworkB[i]/bs] < cstart)
1049               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1050             else break;
1051           }
1052           imark = i;
1053         }
1054         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1055         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1056       }
1057     } else {
1058       if (idx) *idx = 0;
1059       if (v)   *v   = 0;
1060     }
1061   }
1062   *nz = nztot;
1063   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1064   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1070 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1071 {
1072   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1073 
1074   PetscFunctionBegin;
1075   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1076   baij->getrowactive = PETSC_FALSE;
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1082 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1083 {
1084   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1085   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1086 
1087   PetscFunctionBegin;
1088   aA->getrow_utriangular = PETSC_TRUE;
1089   PetscFunctionReturn(0);
1090 }
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1093 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1094 {
1095   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1096   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1097 
1098   PetscFunctionBegin;
1099   aA->getrow_utriangular = PETSC_FALSE;
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1105 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1106 {
1107   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1112   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1118 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1119 {
1120   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1121   PetscErrorCode ierr;
1122 
1123   PetscFunctionBegin;
1124   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1125   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1131 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1132 {
1133   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1138   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1144 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1145 {
1146   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1147   Mat            A = a->A,B = a->B;
1148   PetscErrorCode ierr;
1149   PetscReal      isend[5],irecv[5];
1150 
1151   PetscFunctionBegin;
1152   info->block_size     = (PetscReal)matin->rmap->bs;
1153   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1154   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1155   isend[3] = info->memory;  isend[4] = info->mallocs;
1156   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1157   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1158   isend[3] += info->memory;  isend[4] += info->mallocs;
1159   if (flag == MAT_LOCAL) {
1160     info->nz_used      = isend[0];
1161     info->nz_allocated = isend[1];
1162     info->nz_unneeded  = isend[2];
1163     info->memory       = isend[3];
1164     info->mallocs      = isend[4];
1165   } else if (flag == MAT_GLOBAL_MAX) {
1166     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1167     info->nz_used      = irecv[0];
1168     info->nz_allocated = irecv[1];
1169     info->nz_unneeded  = irecv[2];
1170     info->memory       = irecv[3];
1171     info->mallocs      = irecv[4];
1172   } else if (flag == MAT_GLOBAL_SUM) {
1173     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1174     info->nz_used      = irecv[0];
1175     info->nz_allocated = irecv[1];
1176     info->nz_unneeded  = irecv[2];
1177     info->memory       = irecv[3];
1178     info->mallocs      = irecv[4];
1179   } else {
1180     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1181   }
1182   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1183   info->fill_ratio_needed = 0;
1184   info->factor_mallocs    = 0;
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1190 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool  flg)
1191 {
1192   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1193   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197   switch (op) {
1198   case MAT_NEW_NONZERO_LOCATIONS:
1199   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1200   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1201   case MAT_KEEP_NONZERO_PATTERN:
1202   case MAT_NEW_NONZERO_LOCATION_ERR:
1203     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1204     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1205     break;
1206   case MAT_ROW_ORIENTED:
1207     a->roworiented = flg;
1208     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1209     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1210     break;
1211   case MAT_NEW_DIAGONALS:
1212     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1213     break;
1214   case MAT_IGNORE_OFF_PROC_ENTRIES:
1215     a->donotstash = flg;
1216     break;
1217   case MAT_USE_HASH_TABLE:
1218     a->ht_flag = flg;
1219     break;
1220   case MAT_HERMITIAN:
1221     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1222     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1223     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1224     break;
1225   case MAT_SPD:
1226     A->spd_set                         = PETSC_TRUE;
1227     A->spd                             = flg;
1228     if (flg) {
1229       A->symmetric                     = PETSC_TRUE;
1230       A->structurally_symmetric        = PETSC_TRUE;
1231       A->symmetric_set                 = PETSC_TRUE;
1232       A->structurally_symmetric_set    = PETSC_TRUE;
1233     }
1234     break;
1235   case MAT_SYMMETRIC:
1236     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1237     break;
1238   case MAT_STRUCTURALLY_SYMMETRIC:
1239     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1240     break;
1241   case MAT_SYMMETRY_ETERNAL:
1242     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1243     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1244     break;
1245   case MAT_IGNORE_LOWER_TRIANGULAR:
1246     aA->ignore_ltriangular = flg;
1247     break;
1248   case MAT_ERROR_LOWER_TRIANGULAR:
1249     aA->ignore_ltriangular = flg;
1250     break;
1251   case MAT_GETROW_UPPERTRIANGULAR:
1252     aA->getrow_utriangular = flg;
1253     break;
1254   default:
1255     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1262 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1263 {
1264   PetscErrorCode ierr;
1265   PetscFunctionBegin;
1266   if (MAT_INITIAL_MATRIX || *B != A) {
1267     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1268   }
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1274 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1275 {
1276   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1277   Mat            a=baij->A, b=baij->B;
1278   PetscErrorCode ierr;
1279   PetscInt       nv,m,n;
1280   PetscBool      flg;
1281 
1282   PetscFunctionBegin;
1283   if (ll != rr){
1284     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1285     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1286   }
1287   if (!ll) PetscFunctionReturn(0);
1288 
1289   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1290   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1291 
1292   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1293   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1294 
1295   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1296 
1297   /* left diagonalscale the off-diagonal part */
1298   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1299 
1300   /* scale the diagonal part */
1301   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1302 
1303   /* right diagonalscale the off-diagonal part */
1304   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1311 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1312 {
1313   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatEqual_MPISBAIJ"
1325 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1326 {
1327   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1328   Mat            a,b,c,d;
1329   PetscBool      flg;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   a = matA->A; b = matA->B;
1334   c = matB->A; d = matB->B;
1335 
1336   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1337   if (flg) {
1338     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1339   }
1340   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatCopy_MPISBAIJ"
1346 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1347 {
1348   PetscErrorCode ierr;
1349   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1350   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1351 
1352   PetscFunctionBegin;
1353   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1354   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1355     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1356     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1357     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1358   } else {
1359     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1360     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1361   }
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1367 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1368 {
1369   PetscErrorCode ierr;
1370 
1371   PetscFunctionBegin;
1372   if (A->rmap->bs < 0) {
1373     ierr = PetscLayoutSetBlockSize(A->rmap,1);CHKERRQ(ierr);
1374     ierr = PetscLayoutSetBlockSize(A->cmap,1);CHKERRQ(ierr);
1375   }
1376   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1382 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1383 {
1384   PetscErrorCode ierr;
1385   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1386   PetscBLASInt   bnz,one=1;
1387   Mat_SeqSBAIJ   *xa,*ya;
1388   Mat_SeqBAIJ    *xb,*yb;
1389 
1390   PetscFunctionBegin;
1391   if (str == SAME_NONZERO_PATTERN) {
1392     PetscScalar alpha = a;
1393     xa = (Mat_SeqSBAIJ *)xx->A->data;
1394     ya = (Mat_SeqSBAIJ *)yy->A->data;
1395     bnz = PetscBLASIntCast(xa->nz);
1396     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1397     xb = (Mat_SeqBAIJ *)xx->B->data;
1398     yb = (Mat_SeqBAIJ *)yy->B->data;
1399     bnz = PetscBLASIntCast(xb->nz);
1400     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1401   } else {
1402     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1403     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1404     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1405   }
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1411 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1412 {
1413   PetscErrorCode ierr;
1414   PetscInt       i;
1415   PetscBool      flg;
1416 
1417   PetscFunctionBegin;
1418   for (i=0; i<n; i++) {
1419     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1420     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1421   }
1422   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 
1427 /* -------------------------------------------------------------------*/
1428 static struct _MatOps MatOps_Values = {
1429        MatSetValues_MPISBAIJ,
1430        MatGetRow_MPISBAIJ,
1431        MatRestoreRow_MPISBAIJ,
1432        MatMult_MPISBAIJ,
1433 /* 4*/ MatMultAdd_MPISBAIJ,
1434        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1435        MatMultAdd_MPISBAIJ,
1436        0,
1437        0,
1438        0,
1439 /*10*/ 0,
1440        0,
1441        0,
1442        MatSOR_MPISBAIJ,
1443        MatTranspose_MPISBAIJ,
1444 /*15*/ MatGetInfo_MPISBAIJ,
1445        MatEqual_MPISBAIJ,
1446        MatGetDiagonal_MPISBAIJ,
1447        MatDiagonalScale_MPISBAIJ,
1448        MatNorm_MPISBAIJ,
1449 /*20*/ MatAssemblyBegin_MPISBAIJ,
1450        MatAssemblyEnd_MPISBAIJ,
1451        MatSetOption_MPISBAIJ,
1452        MatZeroEntries_MPISBAIJ,
1453 /*24*/ 0,
1454        0,
1455        0,
1456        0,
1457        0,
1458 /*29*/ MatSetUp_MPISBAIJ,
1459        0,
1460        0,
1461        0,
1462        0,
1463 /*34*/ MatDuplicate_MPISBAIJ,
1464        0,
1465        0,
1466        0,
1467        0,
1468 /*39*/ MatAXPY_MPISBAIJ,
1469        MatGetSubMatrices_MPISBAIJ,
1470        MatIncreaseOverlap_MPISBAIJ,
1471        MatGetValues_MPISBAIJ,
1472        MatCopy_MPISBAIJ,
1473 /*44*/ 0,
1474        MatScale_MPISBAIJ,
1475        0,
1476        0,
1477        0,
1478 /*49*/ 0,
1479        0,
1480        0,
1481        0,
1482        0,
1483 /*54*/ 0,
1484        0,
1485        MatSetUnfactored_MPISBAIJ,
1486        0,
1487        MatSetValuesBlocked_MPISBAIJ,
1488 /*59*/ 0,
1489        0,
1490        0,
1491        0,
1492        0,
1493 /*64*/ 0,
1494        0,
1495        0,
1496        0,
1497        0,
1498 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1499        0,
1500        0,
1501        0,
1502        0,
1503 /*74*/ 0,
1504        0,
1505        0,
1506        0,
1507        0,
1508 /*79*/ 0,
1509        0,
1510        0,
1511        0,
1512        MatLoad_MPISBAIJ,
1513 /*84*/ 0,
1514        0,
1515        0,
1516        0,
1517        0,
1518 /*89*/ 0,
1519        0,
1520        0,
1521        0,
1522        0,
1523 /*94*/ 0,
1524        0,
1525        0,
1526        0,
1527        0,
1528 /*99*/ 0,
1529        0,
1530        0,
1531        0,
1532        0,
1533 /*104*/0,
1534        MatRealPart_MPISBAIJ,
1535        MatImaginaryPart_MPISBAIJ,
1536        MatGetRowUpperTriangular_MPISBAIJ,
1537        MatRestoreRowUpperTriangular_MPISBAIJ,
1538 /*109*/0,
1539        0,
1540        0,
1541        0,
1542        0,
1543 /*114*/0,
1544        0,
1545        0,
1546        0,
1547        0,
1548 /*119*/0,
1549        0,
1550        0,
1551        0
1552 };
1553 
1554 
1555 EXTERN_C_BEGIN
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1558 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1559 {
1560   PetscFunctionBegin;
1561   *a = ((Mat_MPISBAIJ *)A->data)->A;
1562   PetscFunctionReturn(0);
1563 }
1564 EXTERN_C_END
1565 
1566 EXTERN_C_BEGIN
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1569 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1570 {
1571   Mat_MPISBAIJ   *b;
1572   PetscErrorCode ierr;
1573   PetscInt       i,mbs,Mbs;
1574 
1575   PetscFunctionBegin;
1576   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1577   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1578   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1579   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1580 
1581   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1582   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1583   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1584   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1585 
1586   if (d_nnz) {
1587     for (i=0; i<B->rmap->n/bs; i++) {
1588       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1589     }
1590   }
1591   if (o_nnz) {
1592     for (i=0; i<B->rmap->n/bs; i++) {
1593       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1594     }
1595   }
1596 
1597   b   = (Mat_MPISBAIJ*)B->data;
1598   mbs = B->rmap->n/bs;
1599   Mbs = B->rmap->N/bs;
1600   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1601 
1602   B->rmap->bs  = bs;
1603   b->bs2 = bs*bs;
1604   b->mbs = mbs;
1605   b->nbs = mbs;
1606   b->Mbs = Mbs;
1607   b->Nbs = Mbs;
1608 
1609   for (i=0; i<=b->size; i++) {
1610     b->rangebs[i] = B->rmap->range[i]/bs;
1611   }
1612   b->rstartbs = B->rmap->rstart/bs;
1613   b->rendbs   = B->rmap->rend/bs;
1614 
1615   b->cstartbs = B->cmap->rstart/bs;
1616   b->cendbs   = B->cmap->rend/bs;
1617 
1618   if (!B->preallocated) {
1619     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1620     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1621     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1622     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1623     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1624     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1625     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1626     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1627     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1628   }
1629 
1630   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1631   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1632   B->preallocated = PETSC_TRUE;
1633   PetscFunctionReturn(0);
1634 }
1635 EXTERN_C_END
1636 
1637 EXTERN_C_BEGIN
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1640 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1641 {
1642   PetscInt       m,rstart,cstart,cend;
1643   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1644   const PetscInt *JJ=0;
1645   PetscScalar    *values=0;
1646   PetscErrorCode ierr;
1647 
1648   PetscFunctionBegin;
1649 
1650   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1651   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1652   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1653   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1654   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1655   m      = B->rmap->n/bs;
1656   rstart = B->rmap->rstart/bs;
1657   cstart = B->cmap->rstart/bs;
1658   cend   = B->cmap->rend/bs;
1659 
1660   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1661   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1662   for (i=0; i<m; i++) {
1663     nz = ii[i+1] - ii[i];
1664     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1665     nz_max = PetscMax(nz_max,nz);
1666     JJ  = jj + ii[i];
1667     for (j=0; j<nz; j++) {
1668       if (*JJ >= cstart) break;
1669       JJ++;
1670     }
1671     d = 0;
1672     for (; j<nz; j++) {
1673       if (*JJ++ >= cend) break;
1674       d++;
1675     }
1676     d_nnz[i] = d;
1677     o_nnz[i] = nz - d;
1678   }
1679   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1680   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1681 
1682   values = (PetscScalar*)V;
1683   if (!values) {
1684     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1685     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1686   }
1687   for (i=0; i<m; i++) {
1688     PetscInt          row    = i + rstart;
1689     PetscInt          ncols  = ii[i+1] - ii[i];
1690     const PetscInt    *icols = jj + ii[i];
1691     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1692     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1693   }
1694 
1695   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1696   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1697   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1698   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 EXTERN_C_END
1702 
1703 EXTERN_C_BEGIN
1704 #if defined(PETSC_HAVE_MUMPS)
1705 extern PetscErrorCode  MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1706 #endif
1707 #if defined(PETSC_HAVE_SPOOLES)
1708 extern PetscErrorCode  MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1709 #endif
1710 #if defined(PETSC_HAVE_PASTIX)
1711 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1712 #endif
1713 EXTERN_C_END
1714 
1715 /*MC
1716    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1717    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1718    the matrix is stored.
1719 
1720   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1721   can call MatSetOption(Mat, MAT_HERMITIAN);
1722 
1723    Options Database Keys:
1724 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1725 
1726   Level: beginner
1727 
1728 .seealso: MatCreateMPISBAIJ
1729 M*/
1730 
1731 EXTERN_C_BEGIN
1732 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,const MatType,MatReuse,Mat*);
1733 EXTERN_C_END
1734 
1735 EXTERN_C_BEGIN
1736 #undef __FUNCT__
1737 #define __FUNCT__ "MatCreate_MPISBAIJ"
1738 PetscErrorCode  MatCreate_MPISBAIJ(Mat B)
1739 {
1740   Mat_MPISBAIJ   *b;
1741   PetscErrorCode ierr;
1742   PetscBool      flg;
1743 
1744   PetscFunctionBegin;
1745 
1746   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1747   B->data = (void*)b;
1748   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1749 
1750   B->ops->destroy    = MatDestroy_MPISBAIJ;
1751   B->ops->view       = MatView_MPISBAIJ;
1752   B->assembled       = PETSC_FALSE;
1753 
1754   B->insertmode = NOT_SET_VALUES;
1755   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1756   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1757 
1758   /* build local table of row and column ownerships */
1759   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1760 
1761   /* build cache for off array entries formed */
1762   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1763   b->donotstash  = PETSC_FALSE;
1764   b->colmap      = PETSC_NULL;
1765   b->garray      = PETSC_NULL;
1766   b->roworiented = PETSC_TRUE;
1767 
1768   /* stuff used in block assembly */
1769   b->barray       = 0;
1770 
1771   /* stuff used for matrix vector multiply */
1772   b->lvec         = 0;
1773   b->Mvctx        = 0;
1774   b->slvec0       = 0;
1775   b->slvec0b      = 0;
1776   b->slvec1       = 0;
1777   b->slvec1a      = 0;
1778   b->slvec1b      = 0;
1779   b->sMvctx       = 0;
1780 
1781   /* stuff for MatGetRow() */
1782   b->rowindices   = 0;
1783   b->rowvalues    = 0;
1784   b->getrowactive = PETSC_FALSE;
1785 
1786   /* hash table stuff */
1787   b->ht           = 0;
1788   b->hd           = 0;
1789   b->ht_size      = 0;
1790   b->ht_flag      = PETSC_FALSE;
1791   b->ht_fact      = 0;
1792   b->ht_total_ct  = 0;
1793   b->ht_insert_ct = 0;
1794 
1795   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1796   b->ijonly       = PETSC_FALSE;
1797 
1798   b->in_loc       = 0;
1799   b->v_loc        = 0;
1800   b->n_loc        = 0;
1801   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1802     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1803     if (flg) {
1804       PetscReal fact = 1.39;
1805       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1806       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1807       if (fact <= 1.0) fact = 1.39;
1808       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1809       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1810     }
1811   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1812 
1813 #if defined(PETSC_HAVE_PASTIX)
1814   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1815 					   "MatGetFactor_mpisbaij_pastix",
1816 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1817 #endif
1818 #if defined(PETSC_HAVE_MUMPS)
1819   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1820                                      "MatGetFactor_sbaij_mumps",
1821                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1822 #endif
1823 #if defined(PETSC_HAVE_SPOOLES)
1824   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1825                                      "MatGetFactor_mpisbaij_spooles",
1826                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1827 #endif
1828   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1829                                      "MatStoreValues_MPISBAIJ",
1830                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1831   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1832                                      "MatRetrieveValues_MPISBAIJ",
1833                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1834   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1835                                      "MatGetDiagonalBlock_MPISBAIJ",
1836                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1837   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1838                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1839                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1840   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1841                                      "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1842                                      MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1843   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",
1844                                      "MatConvert_MPISBAIJ_MPISBSTRM",
1845                                       MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1846 
1847   B->symmetric                  = PETSC_TRUE;
1848   B->structurally_symmetric     = PETSC_TRUE;
1849   B->symmetric_set              = PETSC_TRUE;
1850   B->structurally_symmetric_set = PETSC_TRUE;
1851   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1852   PetscFunctionReturn(0);
1853 }
1854 EXTERN_C_END
1855 
1856 /*MC
1857    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1858 
1859    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1860    and MATMPISBAIJ otherwise.
1861 
1862    Options Database Keys:
1863 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1864 
1865   Level: beginner
1866 
1867 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1868 M*/
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1872 /*@C
1873    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1874    the user should preallocate the matrix storage by setting the parameters
1875    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1876    performance can be increased by more than a factor of 50.
1877 
1878    Collective on Mat
1879 
1880    Input Parameters:
1881 +  A - the matrix
1882 .  bs   - size of blockk
1883 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1884            submatrix  (same for all local rows)
1885 .  d_nnz - array containing the number of block nonzeros in the various block rows
1886            in the upper triangular and diagonal part of the in diagonal portion of the local
1887            (possibly different for each block row) or PETSC_NULL.  If you plan to factor the matrix you must leave room
1888            for the diagonal entry and set a value even if it is zero.
1889 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1890            submatrix (same for all local rows).
1891 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1892            off-diagonal portion of the local submatrix that is right of the diagonal
1893            (possibly different for each block row) or PETSC_NULL.
1894 
1895 
1896    Options Database Keys:
1897 .   -mat_no_unroll - uses code that does not unroll the loops in the
1898                      block calculations (much slower)
1899 .   -mat_block_size - size of the blocks to use
1900 
1901    Notes:
1902 
1903    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1904    than it must be used on all processors that share the object for that argument.
1905 
1906    If the *_nnz parameter is given then the *_nz parameter is ignored
1907 
1908    Storage Information:
1909    For a square global matrix we define each processor's diagonal portion
1910    to be its local rows and the corresponding columns (a square submatrix);
1911    each processor's off-diagonal portion encompasses the remainder of the
1912    local matrix (a rectangular submatrix).
1913 
1914    The user can specify preallocated storage for the diagonal part of
1915    the local submatrix with either d_nz or d_nnz (not both).  Set
1916    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1917    memory allocation.  Likewise, specify preallocated storage for the
1918    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1919 
1920    You can call MatGetInfo() to get information on how effective the preallocation was;
1921    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1922    You can also run with the option -info and look for messages with the string
1923    malloc in them to see if additional memory allocation was needed.
1924 
1925    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1926    the figure below we depict these three local rows and all columns (0-11).
1927 
1928 .vb
1929            0 1 2 3 4 5 6 7 8 9 10 11
1930           -------------------
1931    row 3  |  . . . d d d o o o o o o
1932    row 4  |  . . . d d d o o o o o o
1933    row 5  |  . . . d d d o o o o o o
1934           -------------------
1935 .ve
1936 
1937    Thus, any entries in the d locations are stored in the d (diagonal)
1938    submatrix, and any entries in the o locations are stored in the
1939    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1940    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1941 
1942    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1943    plus the diagonal part of the d matrix,
1944    and o_nz should indicate the number of block nonzeros per row in the o matrix
1945 
1946    In general, for PDE problems in which most nonzeros are near the diagonal,
1947    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1948    or you will get TERRIBLE performance; see the users' manual chapter on
1949    matrices.
1950 
1951    Level: intermediate
1952 
1953 .keywords: matrix, block, aij, compressed row, sparse, parallel
1954 
1955 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
1956 @*/
1957 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1958 {
1959   PetscErrorCode ierr;
1960 
1961   PetscFunctionBegin;
1962   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1963   PetscValidType(B,1);
1964   PetscValidLogicalCollectiveInt(B,bs,2);
1965   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 #undef __FUNCT__
1970 #define __FUNCT__ "MatCreateSBAIJ"
1971 /*@C
1972    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1973    (block compressed row).  For good matrix assembly performance
1974    the user should preallocate the matrix storage by setting the parameters
1975    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1976    performance can be increased by more than a factor of 50.
1977 
1978    Collective on MPI_Comm
1979 
1980    Input Parameters:
1981 +  comm - MPI communicator
1982 .  bs   - size of blockk
1983 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1984            This value should be the same as the local size used in creating the
1985            y vector for the matrix-vector product y = Ax.
1986 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1987            This value should be the same as the local size used in creating the
1988            x vector for the matrix-vector product y = Ax.
1989 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1990 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1991 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1992            submatrix  (same for all local rows)
1993 .  d_nnz - array containing the number of block nonzeros in the various block rows
1994            in the upper triangular portion of the in diagonal portion of the local
1995            (possibly different for each block block row) or PETSC_NULL.
1996            If you plan to factor the matrix you must leave room for the diagonal entry and
1997            set its value even if it is zero.
1998 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1999            submatrix (same for all local rows).
2000 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2001            off-diagonal portion of the local submatrix (possibly different for
2002            each block row) or PETSC_NULL.
2003 
2004    Output Parameter:
2005 .  A - the matrix
2006 
2007    Options Database Keys:
2008 .   -mat_no_unroll - uses code that does not unroll the loops in the
2009                      block calculations (much slower)
2010 .   -mat_block_size - size of the blocks to use
2011 .   -mat_mpi - use the parallel matrix data structures even on one processor
2012                (defaults to using SeqBAIJ format on one processor)
2013 
2014    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2015    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2016    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2017 
2018    Notes:
2019    The number of rows and columns must be divisible by blocksize.
2020    This matrix type does not support complex Hermitian operation.
2021 
2022    The user MUST specify either the local or global matrix dimensions
2023    (possibly both).
2024 
2025    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2026    than it must be used on all processors that share the object for that argument.
2027 
2028    If the *_nnz parameter is given then the *_nz parameter is ignored
2029 
2030    Storage Information:
2031    For a square global matrix we define each processor's diagonal portion
2032    to be its local rows and the corresponding columns (a square submatrix);
2033    each processor's off-diagonal portion encompasses the remainder of the
2034    local matrix (a rectangular submatrix).
2035 
2036    The user can specify preallocated storage for the diagonal part of
2037    the local submatrix with either d_nz or d_nnz (not both).  Set
2038    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2039    memory allocation.  Likewise, specify preallocated storage for the
2040    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2041 
2042    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2043    the figure below we depict these three local rows and all columns (0-11).
2044 
2045 .vb
2046            0 1 2 3 4 5 6 7 8 9 10 11
2047           -------------------
2048    row 3  |  . . . d d d o o o o o o
2049    row 4  |  . . . d d d o o o o o o
2050    row 5  |  . . . d d d o o o o o o
2051           -------------------
2052 .ve
2053 
2054    Thus, any entries in the d locations are stored in the d (diagonal)
2055    submatrix, and any entries in the o locations are stored in the
2056    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2057    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2058 
2059    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2060    plus the diagonal part of the d matrix,
2061    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2062    In general, for PDE problems in which most nonzeros are near the diagonal,
2063    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2064    or you will get TERRIBLE performance; see the users' manual chapter on
2065    matrices.
2066 
2067    Level: intermediate
2068 
2069 .keywords: matrix, block, aij, compressed row, sparse, parallel
2070 
2071 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2072 @*/
2073 
2074 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2075 {
2076   PetscErrorCode ierr;
2077   PetscMPIInt    size;
2078 
2079   PetscFunctionBegin;
2080   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2081   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2082   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2083   if (size > 1) {
2084     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2085     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2086   } else {
2087     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2088     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2089   }
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 
2094 #undef __FUNCT__
2095 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2096 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2097 {
2098   Mat            mat;
2099   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2100   PetscErrorCode ierr;
2101   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2102   PetscScalar    *array;
2103 
2104   PetscFunctionBegin;
2105   *newmat       = 0;
2106   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2107   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2108   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2109   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2110   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2111   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2112 
2113   mat->factortype   = matin->factortype;
2114   mat->preallocated = PETSC_TRUE;
2115   mat->assembled    = PETSC_TRUE;
2116   mat->insertmode   = NOT_SET_VALUES;
2117 
2118   a = (Mat_MPISBAIJ*)mat->data;
2119   a->bs2   = oldmat->bs2;
2120   a->mbs   = oldmat->mbs;
2121   a->nbs   = oldmat->nbs;
2122   a->Mbs   = oldmat->Mbs;
2123   a->Nbs   = oldmat->Nbs;
2124 
2125 
2126   a->size         = oldmat->size;
2127   a->rank         = oldmat->rank;
2128   a->donotstash   = oldmat->donotstash;
2129   a->roworiented  = oldmat->roworiented;
2130   a->rowindices   = 0;
2131   a->rowvalues    = 0;
2132   a->getrowactive = PETSC_FALSE;
2133   a->barray       = 0;
2134   a->rstartbs    = oldmat->rstartbs;
2135   a->rendbs      = oldmat->rendbs;
2136   a->cstartbs    = oldmat->cstartbs;
2137   a->cendbs      = oldmat->cendbs;
2138 
2139   /* hash table stuff */
2140   a->ht           = 0;
2141   a->hd           = 0;
2142   a->ht_size      = 0;
2143   a->ht_flag      = oldmat->ht_flag;
2144   a->ht_fact      = oldmat->ht_fact;
2145   a->ht_total_ct  = 0;
2146   a->ht_insert_ct = 0;
2147 
2148   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2149   if (oldmat->colmap) {
2150 #if defined (PETSC_USE_CTABLE)
2151     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2152 #else
2153     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2154     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2155     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2156 #endif
2157   } else a->colmap = 0;
2158 
2159   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2160     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2161     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2162     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2163   } else a->garray = 0;
2164 
2165   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2166   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2167   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2168   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2169   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2170 
2171   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2172   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2173   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2174   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2175 
2176   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2177   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2178   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2179   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2180   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2181   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2182   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2183   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2184   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2185   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2186   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2187   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2188   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2189 
2190   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2191   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2192   a->sMvctx = oldmat->sMvctx;
2193   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2194 
2195   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2196   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2197   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2198   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2199   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2200   *newmat = mat;
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 #undef __FUNCT__
2205 #define __FUNCT__ "MatLoad_MPISBAIJ"
2206 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2207 {
2208   PetscErrorCode ierr;
2209   PetscInt       i,nz,j,rstart,rend;
2210   PetscScalar    *vals,*buf;
2211   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2212   MPI_Status     status;
2213   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2214   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2215   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2216   PetscInt       bs=1,Mbs,mbs,extra_rows;
2217   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2218   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2219   int            fd;
2220 
2221   PetscFunctionBegin;
2222   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2223     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2224   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2225 
2226   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2227   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2228   if (!rank) {
2229     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2230     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2231     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2232     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2233   }
2234 
2235   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2236 
2237   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2238   M = header[1]; N = header[2];
2239 
2240   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2241   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2242   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2243 
2244   /* If global sizes are set, check if they are consistent with that given in the file */
2245   if (sizesset) {
2246     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2247   }
2248   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2249   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2250 
2251   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2252 
2253   /*
2254      This code adds extra rows to make sure the number of rows is
2255      divisible by the blocksize
2256   */
2257   Mbs        = M/bs;
2258   extra_rows = bs - M + bs*(Mbs);
2259   if (extra_rows == bs) extra_rows = 0;
2260   else                  Mbs++;
2261   if (extra_rows &&!rank) {
2262     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2263   }
2264 
2265   /* determine ownership of all rows */
2266   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2267     mbs        = Mbs/size + ((Mbs % size) > rank);
2268     m          = mbs*bs;
2269   } else { /* User Set */
2270     m          = newmat->rmap->n;
2271     mbs        = m/bs;
2272   }
2273   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2274   mmbs       = PetscMPIIntCast(mbs);
2275   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2276   rowners[0] = 0;
2277   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2278   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2279   rstart = rowners[rank];
2280   rend   = rowners[rank+1];
2281 
2282   /* distribute row lengths to all processors */
2283   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2284   if (!rank) {
2285     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2286     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2287     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2288     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2289     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2290     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2291     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2292   } else {
2293     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2294   }
2295 
2296   if (!rank) {   /* procs[0] */
2297     /* calculate the number of nonzeros on each processor */
2298     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2299     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2300     for (i=0; i<size; i++) {
2301       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2302         procsnz[i] += rowlengths[j];
2303       }
2304     }
2305     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2306 
2307     /* determine max buffer needed and allocate it */
2308     maxnz = 0;
2309     for (i=0; i<size; i++) {
2310       maxnz = PetscMax(maxnz,procsnz[i]);
2311     }
2312     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2313 
2314     /* read in my part of the matrix column indices  */
2315     nz     = procsnz[0];
2316     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2317     mycols = ibuf;
2318     if (size == 1)  nz -= extra_rows;
2319     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2320     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2321 
2322     /* read in every ones (except the last) and ship off */
2323     for (i=1; i<size-1; i++) {
2324       nz   = procsnz[i];
2325       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2326       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2327     }
2328     /* read in the stuff for the last proc */
2329     if (size != 1) {
2330       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2331       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2332       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2333       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2334     }
2335     ierr = PetscFree(cols);CHKERRQ(ierr);
2336   } else {  /* procs[i], i>0 */
2337     /* determine buffer space needed for message */
2338     nz = 0;
2339     for (i=0; i<m; i++) {
2340       nz += locrowlens[i];
2341     }
2342     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2343     mycols = ibuf;
2344     /* receive message of column indices*/
2345     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2346     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2347     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2348   }
2349 
2350   /* loop over local rows, determining number of off diagonal entries */
2351   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2352   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2353   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2354   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2355   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2356   rowcount = 0;
2357   nzcount  = 0;
2358   for (i=0; i<mbs; i++) {
2359     dcount  = 0;
2360     odcount = 0;
2361     for (j=0; j<bs; j++) {
2362       kmax = locrowlens[rowcount];
2363       for (k=0; k<kmax; k++) {
2364         tmp = mycols[nzcount++]/bs; /* block col. index */
2365         if (!mask[tmp]) {
2366           mask[tmp] = 1;
2367           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2368           else masked1[dcount++] = tmp; /* entry in diag portion */
2369         }
2370       }
2371       rowcount++;
2372     }
2373 
2374     dlens[i]  = dcount;  /* d_nzz[i] */
2375     odlens[i] = odcount; /* o_nzz[i] */
2376 
2377     /* zero out the mask elements we set */
2378     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2379     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2380   }
2381     if (!sizesset) {
2382     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2383   }
2384   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2385   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2386 
2387   if (!rank) {
2388     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2389     /* read in my part of the matrix numerical values  */
2390     nz = procsnz[0];
2391     vals = buf;
2392     mycols = ibuf;
2393     if (size == 1)  nz -= extra_rows;
2394     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2395     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2396 
2397     /* insert into matrix */
2398     jj      = rstart*bs;
2399     for (i=0; i<m; i++) {
2400       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2401       mycols += locrowlens[i];
2402       vals   += locrowlens[i];
2403       jj++;
2404     }
2405 
2406     /* read in other processors (except the last one) and ship out */
2407     for (i=1; i<size-1; i++) {
2408       nz   = procsnz[i];
2409       vals = buf;
2410       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2411       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2412     }
2413     /* the last proc */
2414     if (size != 1){
2415       nz   = procsnz[i] - extra_rows;
2416       vals = buf;
2417       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2418       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2419       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2420     }
2421     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2422 
2423   } else {
2424     /* receive numeric values */
2425     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2426 
2427     /* receive message of values*/
2428     vals   = buf;
2429     mycols = ibuf;
2430     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2431     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2432     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2433 
2434     /* insert into matrix */
2435     jj      = rstart*bs;
2436     for (i=0; i<m; i++) {
2437       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2438       mycols += locrowlens[i];
2439       vals   += locrowlens[i];
2440       jj++;
2441     }
2442   }
2443 
2444   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2445   ierr = PetscFree(buf);CHKERRQ(ierr);
2446   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2447   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2448   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2449   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2450   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2451   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 #undef __FUNCT__
2456 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2457 /*XXXXX@
2458    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2459 
2460    Input Parameters:
2461 .  mat  - the matrix
2462 .  fact - factor
2463 
2464    Not Collective on Mat, each process can have a different hash factor
2465 
2466    Level: advanced
2467 
2468   Notes:
2469    This can also be set by the command line option: -mat_use_hash_table fact
2470 
2471 .keywords: matrix, hashtable, factor, HT
2472 
2473 .seealso: MatSetOption()
2474 @XXXXX*/
2475 
2476 
2477 #undef __FUNCT__
2478 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2479 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2480 {
2481   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2482   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2483   PetscReal      atmp;
2484   PetscReal      *work,*svalues,*rvalues;
2485   PetscErrorCode ierr;
2486   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2487   PetscMPIInt    rank,size;
2488   PetscInt       *rowners_bs,dest,count,source;
2489   PetscScalar    *va;
2490   MatScalar      *ba;
2491   MPI_Status     stat;
2492 
2493   PetscFunctionBegin;
2494   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2495   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2496   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2497 
2498   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2499   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2500 
2501   bs   = A->rmap->bs;
2502   mbs  = a->mbs;
2503   Mbs  = a->Mbs;
2504   ba   = b->a;
2505   bi   = b->i;
2506   bj   = b->j;
2507 
2508   /* find ownerships */
2509   rowners_bs = A->rmap->range;
2510 
2511   /* each proc creates an array to be distributed */
2512   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2513   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2514 
2515   /* row_max for B */
2516   if (rank != size-1){
2517     for (i=0; i<mbs; i++) {
2518       ncols = bi[1] - bi[0]; bi++;
2519       brow  = bs*i;
2520       for (j=0; j<ncols; j++){
2521         bcol = bs*(*bj);
2522         for (kcol=0; kcol<bs; kcol++){
2523           col = bcol + kcol;                 /* local col index */
2524           col += rowners_bs[rank+1];      /* global col index */
2525           for (krow=0; krow<bs; krow++){
2526             atmp = PetscAbsScalar(*ba); ba++;
2527             row = brow + krow;    /* local row index */
2528             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2529             if (work[col] < atmp) work[col] = atmp;
2530           }
2531         }
2532         bj++;
2533       }
2534     }
2535 
2536     /* send values to its owners */
2537     for (dest=rank+1; dest<size; dest++){
2538       svalues = work + rowners_bs[dest];
2539       count   = rowners_bs[dest+1]-rowners_bs[dest];
2540       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2541     }
2542   }
2543 
2544   /* receive values */
2545   if (rank){
2546     rvalues = work;
2547     count   = rowners_bs[rank+1]-rowners_bs[rank];
2548     for (source=0; source<rank; source++){
2549       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2550       /* process values */
2551       for (i=0; i<count; i++){
2552         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2553       }
2554     }
2555   }
2556 
2557   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2558   ierr = PetscFree(work);CHKERRQ(ierr);
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 #undef __FUNCT__
2563 #define __FUNCT__ "MatSOR_MPISBAIJ"
2564 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2565 {
2566   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2567   PetscErrorCode    ierr;
2568   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2569   PetscScalar       *x,*ptr,*from;
2570   Vec               bb1;
2571   const PetscScalar *b;
2572 
2573   PetscFunctionBegin;
2574   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2575   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2576 
2577   if (flag == SOR_APPLY_UPPER) {
2578     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2579     PetscFunctionReturn(0);
2580   }
2581 
2582   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2583     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2584       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2585       its--;
2586     }
2587 
2588     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2589     while (its--){
2590 
2591       /* lower triangular part: slvec0b = - B^T*xx */
2592       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2593 
2594       /* copy xx into slvec0a */
2595       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2596       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2597       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2598       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2599 
2600       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2601 
2602       /* copy bb into slvec1a */
2603       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2604       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2605       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2606       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2607 
2608       /* set slvec1b = 0 */
2609       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2610 
2611       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2612       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2613       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2614       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2615 
2616       /* upper triangular part: bb1 = bb1 - B*x */
2617       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2618 
2619       /* local diagonal sweep */
2620       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2621     }
2622     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2623   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2624     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2625   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2626     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2627   } else if (flag & SOR_EISENSTAT) {
2628     Vec               xx1;
2629     PetscBool         hasop;
2630     const PetscScalar *diag;
2631     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2632     PetscInt          i,n;
2633 
2634     if (!mat->xx1) {
2635       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2636       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2637     }
2638     xx1 = mat->xx1;
2639     bb1 = mat->bb1;
2640 
2641     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2642 
2643     if (!mat->diag) {
2644       /* this is wrong for same matrix with new nonzero values */
2645       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2646       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2647     }
2648     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2649 
2650     if (hasop) {
2651       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2652       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2653     } else {
2654       /*
2655           These two lines are replaced by code that may be a bit faster for a good compiler
2656       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2657       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2658       */
2659       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2660       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2661       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2662       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2663       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2664       if (omega == 1.0) {
2665 	for (i=0; i<n; i++) {
2666 	  sl[i] = b[i] - diag[i]*x[i];
2667 	}
2668         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2669       } else {
2670 	for (i=0; i<n; i++) {
2671 	  sl[i] = b[i] + scale*diag[i]*x[i];
2672 	}
2673         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2674       }
2675       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2676       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2677       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2678       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2679     }
2680 
2681     /* multiply off-diagonal portion of matrix */
2682     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2683     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2684     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2685     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2686     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2687     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2688     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2689     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2690     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2691     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2692 
2693     /* local sweep */
2694     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2695     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2696   } else {
2697     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2698   }
2699   PetscFunctionReturn(0);
2700 }
2701 
2702 #undef __FUNCT__
2703 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2704 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2705 {
2706   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2707   PetscErrorCode ierr;
2708   Vec            lvec1,bb1;
2709 
2710   PetscFunctionBegin;
2711   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2712   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2713 
2714   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2715     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2716       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2717       its--;
2718     }
2719 
2720     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2721     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2722     while (its--){
2723       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2724 
2725       /* lower diagonal part: bb1 = bb - B^T*xx */
2726       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2727       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2728 
2729       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2730       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2731       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2732 
2733       /* upper diagonal part: bb1 = bb1 - B*x */
2734       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2735       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2736 
2737       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2738 
2739       /* diagonal sweep */
2740       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2741     }
2742     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2743     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2744   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2745   PetscFunctionReturn(0);
2746 }
2747 
2748 #undef __FUNCT__
2749 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2750 /*@
2751      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2752          CSR format the local rows.
2753 
2754    Collective on MPI_Comm
2755 
2756    Input Parameters:
2757 +  comm - MPI communicator
2758 .  bs - the block size, only a block size of 1 is supported
2759 .  m - number of local rows (Cannot be PETSC_DECIDE)
2760 .  n - This value should be the same as the local size used in creating the
2761        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2762        calculated if N is given) For square matrices n is almost always m.
2763 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2764 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2765 .   i - row indices
2766 .   j - column indices
2767 -   a - matrix values
2768 
2769    Output Parameter:
2770 .   mat - the matrix
2771 
2772    Level: intermediate
2773 
2774    Notes:
2775        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2776      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2777      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2778 
2779        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2780 
2781 .keywords: matrix, aij, compressed row, sparse, parallel
2782 
2783 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2784           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2785 @*/
2786 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2787 {
2788   PetscErrorCode ierr;
2789 
2790 
2791  PetscFunctionBegin;
2792   if (i[0]) {
2793     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2794   }
2795   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2796   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2797   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2798   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2799   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 
2804 #undef __FUNCT__
2805 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2806 /*@C
2807    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2808    (the default parallel PETSc format).
2809 
2810    Collective on MPI_Comm
2811 
2812    Input Parameters:
2813 +  A - the matrix
2814 .  bs - the block size
2815 .  i - the indices into j for the start of each local row (starts with zero)
2816 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2817 -  v - optional values in the matrix
2818 
2819    Level: developer
2820 
2821 .keywords: matrix, aij, compressed row, sparse, parallel
2822 
2823 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2824 @*/
2825 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2826 {
2827   PetscErrorCode ierr;
2828 
2829   PetscFunctionBegin;
2830   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2831   PetscFunctionReturn(0);
2832 }
2833 
2834 
2835