xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision de196d0083ff058e71880dfa38e0c07d4e33cbfb)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #undef __FUNCT__
11 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
12 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
19   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
20   PetscFunctionReturn(0);
21 }
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
25 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
32   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
37   { \
38  \
39     brow = row/bs;  \
40     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
41     rmax = aimax[brow]; nrow = ailen[brow]; \
42     bcol = col/bs; \
43     ridx = row % bs; cidx = col % bs; \
44     low  = 0; high = nrow; \
45     while (high-low > 3) { \
46       t = (low+high)/2; \
47       if (rp[t] > bcol) high = t; \
48       else              low  = t; \
49     } \
50     for (_i=low; _i<high; _i++) { \
51       if (rp[_i] > bcol) break; \
52       if (rp[_i] == bcol) { \
53         bap = ap + bs2*_i + bs*cidx + ridx; \
54         if (addv == ADD_VALUES) *bap += value;  \
55         else                    *bap  = value;  \
56         goto a_noinsert; \
57       } \
58     } \
59     if (a->nonew == 1) goto a_noinsert; \
60     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
61     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
62     N = nrow++ - 1;  \
63     /* shift up all the later entries in this row */ \
64     for (ii=N; ii>=_i; ii--) { \
65       rp[ii+1] = rp[ii]; \
66       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
67     } \
68     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
69     rp[_i]                      = bcol;  \
70     ap[bs2*_i + bs*cidx + ridx] = value;  \
71     A->nonzerostate++;\
72 a_noinsert:; \
73     ailen[brow] = nrow; \
74   }
75 
76 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
77   { \
78     brow = row/bs;  \
79     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
80     rmax = bimax[brow]; nrow = bilen[brow]; \
81     bcol = col/bs; \
82     ridx = row % bs; cidx = col % bs; \
83     low  = 0; high = nrow; \
84     while (high-low > 3) { \
85       t = (low+high)/2; \
86       if (rp[t] > bcol) high = t; \
87       else              low  = t; \
88     } \
89     for (_i=low; _i<high; _i++) { \
90       if (rp[_i] > bcol) break; \
91       if (rp[_i] == bcol) { \
92         bap = ap + bs2*_i + bs*cidx + ridx; \
93         if (addv == ADD_VALUES) *bap += value;  \
94         else                    *bap  = value;  \
95         goto b_noinsert; \
96       } \
97     } \
98     if (b->nonew == 1) goto b_noinsert; \
99     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
100     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
101     N = nrow++ - 1;  \
102     /* shift up all the later entries in this row */ \
103     for (ii=N; ii>=_i; ii--) { \
104       rp[ii+1] = rp[ii]; \
105       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
106     } \
107     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
108     rp[_i]                      = bcol;  \
109     ap[bs2*_i + bs*cidx + ridx] = value;  \
110     B->nonzerostate++;\
111 b_noinsert:; \
112     bilen[brow] = nrow; \
113   }
114 
115 /* Only add/insert a(i,j) with i<=j (blocks).
116    Any a(i,j) with i>j input by user is ingored.
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatSetValues_MPISBAIJ"
120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
121 {
122   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
123   MatScalar      value;
124   PetscBool      roworiented = baij->roworiented;
125   PetscErrorCode ierr;
126   PetscInt       i,j,row,col;
127   PetscInt       rstart_orig=mat->rmap->rstart;
128   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
129   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
130 
131   /* Some Variables required in the macro */
132   Mat          A     = baij->A;
133   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
134   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
135   MatScalar    *aa   =a->a;
136 
137   Mat         B     = baij->B;
138   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
139   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
140   MatScalar   *ba   =b->a;
141 
142   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
143   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
144   MatScalar *ap,*bap;
145 
146   /* for stash */
147   PetscInt  n_loc, *in_loc = NULL;
148   MatScalar *v_loc = NULL;
149 
150   PetscFunctionBegin;
151   if (!baij->donotstash) {
152     if (n > baij->n_loc) {
153       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
154       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
155       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
156       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
157 
158       baij->n_loc = n;
159     }
160     in_loc = baij->in_loc;
161     v_loc  = baij->v_loc;
162   }
163 
164   for (i=0; i<m; i++) {
165     if (im[i] < 0) continue;
166 #if defined(PETSC_USE_DEBUG)
167     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
168 #endif
169     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
170       row = im[i] - rstart_orig;              /* local row index */
171       for (j=0; j<n; j++) {
172         if (im[i]/bs > in[j]/bs) {
173           if (a->ignore_ltriangular) {
174             continue;    /* ignore lower triangular blocks */
175           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
176         }
177         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
178           col  = in[j] - cstart_orig;         /* local col index */
179           brow = row/bs; bcol = col/bs;
180           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
181           if (roworiented) value = v[i*n+j];
182           else             value = v[i+j*m];
183           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
184           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
185         } else if (in[j] < 0) continue;
186 #if defined(PETSC_USE_DEBUG)
187         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
188 #endif
189         else {  /* off-diag entry (B) */
190           if (mat->was_assembled) {
191             if (!baij->colmap) {
192               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
193             }
194 #if defined(PETSC_USE_CTABLE)
195             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
196             col  = col - 1;
197 #else
198             col = baij->colmap[in[j]/bs] - 1;
199 #endif
200             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
201               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
202               col  =  in[j];
203               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
204               B    = baij->B;
205               b    = (Mat_SeqBAIJ*)(B)->data;
206               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
207               ba   = b->a;
208             } else col += in[j]%bs;
209           } else col = in[j];
210           if (roworiented) value = v[i*n+j];
211           else             value = v[i+j*m];
212           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
213           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
214         }
215       }
216     } else {  /* off processor entry */
217       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
218       if (!baij->donotstash) {
219         mat->assembled = PETSC_FALSE;
220         n_loc          = 0;
221         for (j=0; j<n; j++) {
222           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
223           in_loc[n_loc] = in[j];
224           if (roworiented) {
225             v_loc[n_loc] = v[i*n+j];
226           } else {
227             v_loc[n_loc] = v[j*m+i];
228           }
229           n_loc++;
230         }
231         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined"
240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
241 {
242   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
243   PetscErrorCode    ierr;
244   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
245   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
246   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
247   PetscBool         roworiented=a->roworiented;
248   const PetscScalar *value     = v;
249   MatScalar         *ap,*aa = a->a,*bap;
250 
251   PetscFunctionBegin;
252   if (col < row) {
253     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
254     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
255   }
256   rp   = aj + ai[row];
257   ap   = aa + bs2*ai[row];
258   rmax = imax[row];
259   nrow = ailen[row];
260   value = v;
261   low   = 0;
262   high  = nrow;
263 
264   while (high-low > 7) {
265     t = (low+high)/2;
266     if (rp[t] > col) high = t;
267     else             low  = t;
268   }
269   for (i=low; i<high; i++) {
270     if (rp[i] > col) break;
271     if (rp[i] == col) {
272       bap = ap +  bs2*i;
273       if (roworiented) {
274         if (is == ADD_VALUES) {
275           for (ii=0; ii<bs; ii++) {
276             for (jj=ii; jj<bs2; jj+=bs) {
277               bap[jj] += *value++;
278             }
279           }
280         } else {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=ii; jj<bs2; jj+=bs) {
283               bap[jj] = *value++;
284             }
285           }
286         }
287       } else {
288         if (is == ADD_VALUES) {
289           for (ii=0; ii<bs; ii++) {
290             for (jj=0; jj<bs; jj++) {
291               *bap++ += *value++;
292             }
293           }
294         } else {
295           for (ii=0; ii<bs; ii++) {
296             for (jj=0; jj<bs; jj++) {
297               *bap++  = *value++;
298             }
299           }
300         }
301       }
302       goto noinsert2;
303     }
304   }
305   if (nonew == 1) goto noinsert2;
306   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
307   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
308   N = nrow++ - 1; high++;
309   /* shift up all the later entries in this row */
310   for (ii=N; ii>=i; ii--) {
311     rp[ii+1] = rp[ii];
312     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
313   }
314   if (N >= i) {
315     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
316   }
317   rp[i] = col;
318   bap   = ap +  bs2*i;
319   if (roworiented) {
320     for (ii=0; ii<bs; ii++) {
321       for (jj=ii; jj<bs2; jj+=bs) {
322         bap[jj] = *value++;
323       }
324     }
325   } else {
326     for (ii=0; ii<bs; ii++) {
327       for (jj=0; jj<bs; jj++) {
328         *bap++ = *value++;
329       }
330     }
331   }
332   noinsert2:;
333   ailen[row] = nrow;
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined"
339 /*
340    This routine is exactly duplicated in mpibaij.c
341 */
342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
343 {
344   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
345   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
346   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
347   PetscErrorCode    ierr;
348   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
349   PetscBool         roworiented=a->roworiented;
350   const PetscScalar *value     = v;
351   MatScalar         *ap,*aa = a->a,*bap;
352 
353   PetscFunctionBegin;
354   rp   = aj + ai[row];
355   ap   = aa + bs2*ai[row];
356   rmax = imax[row];
357   nrow = ailen[row];
358   low  = 0;
359   high = nrow;
360   value = v;
361   while (high-low > 7) {
362     t = (low+high)/2;
363     if (rp[t] > col) high = t;
364     else             low  = t;
365   }
366   for (i=low; i<high; i++) {
367     if (rp[i] > col) break;
368     if (rp[i] == col) {
369       bap = ap +  bs2*i;
370       if (roworiented) {
371         if (is == ADD_VALUES) {
372           for (ii=0; ii<bs; ii++) {
373             for (jj=ii; jj<bs2; jj+=bs) {
374               bap[jj] += *value++;
375             }
376           }
377         } else {
378           for (ii=0; ii<bs; ii++) {
379             for (jj=ii; jj<bs2; jj+=bs) {
380               bap[jj] = *value++;
381             }
382           }
383         }
384       } else {
385         if (is == ADD_VALUES) {
386           for (ii=0; ii<bs; ii++,value+=bs) {
387             for (jj=0; jj<bs; jj++) {
388               bap[jj] += value[jj];
389             }
390             bap += bs;
391           }
392         } else {
393           for (ii=0; ii<bs; ii++,value+=bs) {
394             for (jj=0; jj<bs; jj++) {
395               bap[jj]  = value[jj];
396             }
397             bap += bs;
398           }
399         }
400       }
401       goto noinsert2;
402     }
403   }
404   if (nonew == 1) goto noinsert2;
405   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
406   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
407   N = nrow++ - 1; high++;
408   /* shift up all the later entries in this row */
409   for (ii=N; ii>=i; ii--) {
410     rp[ii+1] = rp[ii];
411     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
412   }
413   if (N >= i) {
414     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
415   }
416   rp[i] = col;
417   bap   = ap +  bs2*i;
418   if (roworiented) {
419     for (ii=0; ii<bs; ii++) {
420       for (jj=ii; jj<bs2; jj+=bs) {
421         bap[jj] = *value++;
422       }
423     }
424   } else {
425     for (ii=0; ii<bs; ii++) {
426       for (jj=0; jj<bs; jj++) {
427         *bap++ = *value++;
428       }
429     }
430   }
431   noinsert2:;
432   ailen[row] = nrow;
433   PetscFunctionReturn(0);
434 }
435 
436 #undef __FUNCT__
437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
438 /*
439     This routine could be optimized by removing the need for the block copy below and passing stride information
440   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
441 */
442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
443 {
444   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
445   const MatScalar *value;
446   MatScalar       *barray     =baij->barray;
447   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
448   PetscErrorCode  ierr;
449   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
450   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
451   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
452 
453   PetscFunctionBegin;
454   if (!barray) {
455     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
456     baij->barray = barray;
457   }
458 
459   if (roworiented) {
460     stepval = (n-1)*bs;
461   } else {
462     stepval = (m-1)*bs;
463   }
464   for (i=0; i<m; i++) {
465     if (im[i] < 0) continue;
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
468 #endif
469     if (im[i] >= rstart && im[i] < rend) {
470       row = im[i] - rstart;
471       for (j=0; j<n; j++) {
472         if (im[i] > in[j]) {
473           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
474           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
475         }
476         /* If NumCol = 1 then a copy is not required */
477         if ((roworiented) && (n == 1)) {
478           barray = (MatScalar*) v + i*bs2;
479         } else if ((!roworiented) && (m == 1)) {
480           barray = (MatScalar*) v + j*bs2;
481         } else { /* Here a copy is required */
482           if (roworiented) {
483             value = v + i*(stepval+bs)*bs + j*bs;
484           } else {
485             value = v + j*(stepval+bs)*bs + i*bs;
486           }
487           for (ii=0; ii<bs; ii++,value+=stepval) {
488             for (jj=0; jj<bs; jj++) {
489               *barray++ = *value++;
490             }
491           }
492           barray -=bs2;
493         }
494 
495         if (in[j] >= cstart && in[j] < cend) {
496           col  = in[j] - cstart;
497           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
498         } else if (in[j] < 0) continue;
499 #if defined(PETSC_USE_DEBUG)
500         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
501 #endif
502         else {
503           if (mat->was_assembled) {
504             if (!baij->colmap) {
505               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
506             }
507 
508 #if defined(PETSC_USE_DEBUG)
509 #if defined(PETSC_USE_CTABLE)
510             { PetscInt data;
511               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
512               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
513             }
514 #else
515             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
516 #endif
517 #endif
518 #if defined(PETSC_USE_CTABLE)
519             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520             col  = (col - 1)/bs;
521 #else
522             col = (baij->colmap[in[j]] - 1)/bs;
523 #endif
524             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
526               col  = in[j];
527             }
528           } else col = in[j];
529           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
530         }
531       }
532     } else {
533       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
534       if (!baij->donotstash) {
535         if (roworiented) {
536           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
537         } else {
538           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
539         }
540       }
541     }
542   }
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
553   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
558     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
563         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend) {
565           col  = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined(PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data--;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatNorm_MPISBAIJ"
591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
592 {
593   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
594   PetscErrorCode ierr;
595   PetscReal      sum[2],*lnorm2;
596 
597   PetscFunctionBegin;
598   if (baij->size == 1) {
599     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
600   } else {
601     if (type == NORM_FROBENIUS) {
602       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
603       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
605       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
606       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
607       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
608       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
609       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
610     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
611       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
612       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
613       PetscReal    *rsum,*rsum2,vabs;
614       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
615       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
616       MatScalar    *v;
617 
618       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
619       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
620       /* Amat */
621       v = amat->a; jj = amat->j;
622       for (brow=0; brow<mbs; brow++) {
623         grow = bs*(rstart + brow);
624         nz   = amat->i[brow+1] - amat->i[brow];
625         for (bcol=0; bcol<nz; bcol++) {
626           gcol = bs*(rstart + *jj); jj++;
627           for (col=0; col<bs; col++) {
628             for (row=0; row<bs; row++) {
629               vabs            = PetscAbsScalar(*v); v++;
630               rsum[gcol+col] += vabs;
631               /* non-diagonal block */
632               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
633             }
634           }
635         }
636       }
637       /* Bmat */
638       v = bmat->a; jj = bmat->j;
639       for (brow=0; brow<mbs; brow++) {
640         grow = bs*(rstart + brow);
641         nz = bmat->i[brow+1] - bmat->i[brow];
642         for (bcol=0; bcol<nz; bcol++) {
643           gcol = bs*garray[*jj]; jj++;
644           for (col=0; col<bs; col++) {
645             for (row=0; row<bs; row++) {
646               vabs            = PetscAbsScalar(*v); v++;
647               rsum[gcol+col] += vabs;
648               rsum[grow+row] += vabs;
649             }
650           }
651         }
652       }
653       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
654       *norm = 0.0;
655       for (col=0; col<mat->cmap->N; col++) {
656         if (rsum2[col] > *norm) *norm = rsum2[col];
657       }
658       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
659     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
660   }
661   PetscFunctionReturn(0);
662 }
663 
664 #undef __FUNCT__
665 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
666 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
667 {
668   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
669   PetscErrorCode ierr;
670   PetscInt       nstash,reallocs;
671   InsertMode     addv;
672 
673   PetscFunctionBegin;
674   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
675 
676   /* make sure all processors are either in INSERTMODE or ADDMODE */
677   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
678   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
679   mat->insertmode = addv; /* in case this processor had no cache */
680 
681   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
682   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
683   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
684   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
685   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
686   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
692 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
693 {
694   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
695   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
696   PetscErrorCode ierr;
697   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
698   PetscInt       *row,*col;
699   PetscBool      other_disassembled;
700   PetscMPIInt    n;
701   PetscBool      r1,r2,r3;
702   MatScalar      *val;
703   InsertMode     addv = mat->insertmode;
704 
705   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
706   PetscFunctionBegin;
707   if (!baij->donotstash &&  !mat->nooffprocentries) {
708     while (1) {
709       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
710       if (!flg) break;
711 
712       for (i=0; i<n;) {
713         /* Now identify the consecutive vals belonging to the same row */
714         for (j=i,rstart=row[j]; j<n; j++) {
715           if (row[j] != rstart) break;
716         }
717         if (j < n) ncols = j-i;
718         else       ncols = n-i;
719         /* Now assemble all these values with a single function call */
720         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
721         i    = j;
722       }
723     }
724     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
725     /* Now process the block-stash. Since the values are stashed column-oriented,
726        set the roworiented flag to column oriented, and after MatSetValues()
727        restore the original flags */
728     r1 = baij->roworiented;
729     r2 = a->roworiented;
730     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
731 
732     baij->roworiented = PETSC_FALSE;
733     a->roworiented    = PETSC_FALSE;
734 
735     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
736     while (1) {
737       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
738       if (!flg) break;
739 
740       for (i=0; i<n;) {
741         /* Now identify the consecutive vals belonging to the same row */
742         for (j=i,rstart=row[j]; j<n; j++) {
743           if (row[j] != rstart) break;
744         }
745         if (j < n) ncols = j-i;
746         else       ncols = n-i;
747         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
748         i    = j;
749       }
750     }
751     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
752 
753     baij->roworiented = r1;
754     a->roworiented    = r2;
755 
756     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
757   }
758 
759   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
760   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
761 
762   /* determine if any processor has disassembled, if so we must
763      also disassemble ourselfs, in order that we may reassemble. */
764   /*
765      if nonzero structure of submatrix B cannot change then we know that
766      no processor disassembled thus we can skip this stuff
767   */
768   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
769     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
770     if (mat->was_assembled && !other_disassembled) {
771       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
772     }
773   }
774 
775   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
776     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
777   }
778   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
779   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
780 
781   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
782 
783   baij->rowvalues = 0;
784 
785   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
786   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
787     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
788     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
789   }
790   PetscFunctionReturn(0);
791 }
792 
793 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
794 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
795 #include <petscdraw.h>
796 #undef __FUNCT__
797 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
798 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
799 {
800   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
801   PetscErrorCode    ierr;
802   PetscInt          bs   = mat->rmap->bs;
803   PetscMPIInt       rank = baij->rank;
804   PetscBool         iascii,isdraw;
805   PetscViewer       sviewer;
806   PetscViewerFormat format;
807 
808   PetscFunctionBegin;
809   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
810   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
811   if (iascii) {
812     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
813     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
814       MatInfo info;
815       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
816       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
817       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
818       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
819       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
820       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
821       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
822       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
823       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
824       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
825       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
826       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
827       PetscFunctionReturn(0);
828     } else if (format == PETSC_VIEWER_ASCII_INFO) {
829       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
830       PetscFunctionReturn(0);
831     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
832       PetscFunctionReturn(0);
833     }
834   }
835 
836   if (isdraw) {
837     PetscDraw draw;
838     PetscBool isnull;
839     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
840     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
841   }
842 
843   {
844     /* assemble the entire matrix onto first processor. */
845     Mat          A;
846     Mat_SeqSBAIJ *Aloc;
847     Mat_SeqBAIJ  *Bloc;
848     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
849     MatScalar    *a;
850     const char   *matname;
851 
852     /* Should this be the same type as mat? */
853     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
854     if (!rank) {
855       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
856     } else {
857       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
858     }
859     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
860     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
861     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
862     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
863 
864     /* copy over the A part */
865     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
866     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
867     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
868 
869     for (i=0; i<mbs; i++) {
870       rvals[0] = bs*(baij->rstartbs + i);
871       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
872       for (j=ai[i]; j<ai[i+1]; j++) {
873         col = (baij->cstartbs+aj[j])*bs;
874         for (k=0; k<bs; k++) {
875           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
876           col++;
877           a += bs;
878         }
879       }
880     }
881     /* copy over the B part */
882     Bloc = (Mat_SeqBAIJ*)baij->B->data;
883     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
884     for (i=0; i<mbs; i++) {
885 
886       rvals[0] = bs*(baij->rstartbs + i);
887       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
888       for (j=ai[i]; j<ai[i+1]; j++) {
889         col = baij->garray[aj[j]]*bs;
890         for (k=0; k<bs; k++) {
891           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
892           col++;
893           a += bs;
894         }
895       }
896     }
897     ierr = PetscFree(rvals);CHKERRQ(ierr);
898     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
899     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
900     /*
901        Everyone has to call to draw the matrix since the graphics waits are
902        synchronized across all processors that share the PetscDraw object
903     */
904     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
905     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
906     if (!rank) {
907       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
908       ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
909     }
910     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
911     ierr = MatDestroy(&A);CHKERRQ(ierr);
912   }
913   PetscFunctionReturn(0);
914 }
915 
916 #undef __FUNCT__
917 #define __FUNCT__ "MatView_MPISBAIJ"
918 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
919 {
920   PetscErrorCode ierr;
921   PetscBool      iascii,isdraw,issocket,isbinary;
922 
923   PetscFunctionBegin;
924   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
925   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
926   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
927   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
928   if (iascii || isdraw || issocket || isbinary) {
929     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatDestroy_MPISBAIJ"
936 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
937 {
938   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
939   PetscErrorCode ierr;
940 
941   PetscFunctionBegin;
942 #if defined(PETSC_USE_LOG)
943   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
944 #endif
945   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
946   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
947   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
948   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
949 #if defined(PETSC_USE_CTABLE)
950   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
951 #else
952   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
953 #endif
954   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
955   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
956   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
957   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
958   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
959   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
960   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
961   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
962   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
963   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
964   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
965   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
966   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
967   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
968   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
969 #if defined(PETSC_USE_REAL_MAT_SINGLE)
970   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
971 #endif
972   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
973   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
974   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
975   ierr = PetscFree(mat->data);CHKERRQ(ierr);
976 
977   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
978   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
979   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
980   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
981   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
982   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
983 #if defined(PETSC_HAVE_ELEMENTAL)
984   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
985 #endif
986   PetscFunctionReturn(0);
987 }
988 
989 #undef __FUNCT__
990 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
991 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
992 {
993   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
994   PetscErrorCode    ierr;
995   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
996   PetscScalar       *from;
997   const PetscScalar *x;
998 
999   PetscFunctionBegin;
1000   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1001   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1002 
1003   /* diagonal part */
1004   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1005   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1006 
1007   /* subdiagonal part */
1008   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1009 
1010   /* copy x into the vec slvec0 */
1011   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1012   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1013 
1014   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1015   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1016   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1017 
1018   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1019   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1020   /* supperdiagonal part */
1021   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MatMult_MPISBAIJ"
1027 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1028 {
1029   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1030   PetscErrorCode    ierr;
1031   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1032   PetscScalar       *from;
1033   const PetscScalar *x;
1034 
1035   PetscFunctionBegin;
1036   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1037   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1038 
1039   /* diagonal part */
1040   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1041   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1042 
1043   /* subdiagonal part */
1044   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1045 
1046   /* copy x into the vec slvec0 */
1047   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1048   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1049 
1050   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1051   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1052   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1053 
1054   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1055   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1056   /* supperdiagonal part */
1057   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1063 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1064 {
1065   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1066   PetscErrorCode ierr;
1067   PetscInt       nt;
1068 
1069   PetscFunctionBegin;
1070   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1071   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1072 
1073   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1074   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1075 
1076   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1077   /* do diagonal part */
1078   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1079   /* do supperdiagonal part */
1080   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1081   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1082   /* do subdiagonal part */
1083   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1084   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1091 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1092 {
1093   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1094   PetscErrorCode    ierr;
1095   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1096   PetscScalar       *from,zero=0.0;
1097   const PetscScalar *x;
1098 
1099   PetscFunctionBegin;
1100   /*
1101   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1102   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1103   */
1104   /* diagonal part */
1105   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1106   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1107 
1108   /* subdiagonal part */
1109   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1110 
1111   /* copy x into the vec slvec0 */
1112   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1113   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1114   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1115   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1116 
1117   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1118   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1119   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1120 
1121   /* supperdiagonal part */
1122   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1128 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1129 {
1130   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1131   PetscErrorCode ierr;
1132 
1133   PetscFunctionBegin;
1134   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1135   /* do diagonal part */
1136   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1137   /* do supperdiagonal part */
1138   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1140 
1141   /* do subdiagonal part */
1142   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1143   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1144   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 /*
1149   This only works correctly for square matrices where the subblock A->A is the
1150    diagonal block
1151 */
1152 #undef __FUNCT__
1153 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1154 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1155 {
1156   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1157   PetscErrorCode ierr;
1158 
1159   PetscFunctionBegin;
1160   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1161   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 #undef __FUNCT__
1166 #define __FUNCT__ "MatScale_MPISBAIJ"
1167 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1168 {
1169   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1170   PetscErrorCode ierr;
1171 
1172   PetscFunctionBegin;
1173   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1174   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNCT__
1179 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1180 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1181 {
1182   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1183   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1184   PetscErrorCode ierr;
1185   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1186   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1187   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1188 
1189   PetscFunctionBegin;
1190   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1191   mat->getrowactive = PETSC_TRUE;
1192 
1193   if (!mat->rowvalues && (idx || v)) {
1194     /*
1195         allocate enough space to hold information from the longest row.
1196     */
1197     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1198     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1199     PetscInt     max = 1,mbs = mat->mbs,tmp;
1200     for (i=0; i<mbs; i++) {
1201       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1202       if (max < tmp) max = tmp;
1203     }
1204     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1205   }
1206 
1207   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1208   lrow = row - brstart;  /* local row index */
1209 
1210   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1211   if (!v)   {pvA = 0; pvB = 0;}
1212   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1213   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1214   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1215   nztot = nzA + nzB;
1216 
1217   cmap = mat->garray;
1218   if (v  || idx) {
1219     if (nztot) {
1220       /* Sort by increasing column numbers, assuming A and B already sorted */
1221       PetscInt imark = -1;
1222       if (v) {
1223         *v = v_p = mat->rowvalues;
1224         for (i=0; i<nzB; i++) {
1225           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1226           else break;
1227         }
1228         imark = i;
1229         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1230         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1231       }
1232       if (idx) {
1233         *idx = idx_p = mat->rowindices;
1234         if (imark > -1) {
1235           for (i=0; i<imark; i++) {
1236             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1237           }
1238         } else {
1239           for (i=0; i<nzB; i++) {
1240             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1241             else break;
1242           }
1243           imark = i;
1244         }
1245         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1246         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1247       }
1248     } else {
1249       if (idx) *idx = 0;
1250       if (v)   *v   = 0;
1251     }
1252   }
1253   *nz  = nztot;
1254   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1255   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1261 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1262 {
1263   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1264 
1265   PetscFunctionBegin;
1266   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1267   baij->getrowactive = PETSC_FALSE;
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1273 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1274 {
1275   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1276   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1277 
1278   PetscFunctionBegin;
1279   aA->getrow_utriangular = PETSC_TRUE;
1280   PetscFunctionReturn(0);
1281 }
1282 #undef __FUNCT__
1283 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1284 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1285 {
1286   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1287   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1288 
1289   PetscFunctionBegin;
1290   aA->getrow_utriangular = PETSC_FALSE;
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 #undef __FUNCT__
1295 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1296 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1297 {
1298   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1299   PetscErrorCode ierr;
1300 
1301   PetscFunctionBegin;
1302   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1303   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1309 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1310 {
1311   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1316   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ()
1321    Input: isrow       - distributed(parallel),
1322           iscol_local - locally owned (seq)
1323 */
1324 #undef __FUNCT__
1325 #define __FUNCT__ "ISEqual_private"
1326 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1327 {
1328   PetscErrorCode ierr;
1329   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1330   const PetscInt *ptr1,*ptr2;
1331 
1332   PetscFunctionBegin;
1333   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1334   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1335   if (sz1 > sz2) {
1336     *flg = PETSC_FALSE;
1337     PetscFunctionReturn(0);
1338   }
1339 
1340   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1341   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1342 
1343   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1344   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1345   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1346   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1347   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1348   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1349 
1350   nmatch=0;
1351   k     = 0;
1352   for (i=0; i<sz1; i++){
1353     for (j=k; j<sz2; j++){
1354       if (a1[i] == a2[j]) {
1355         k = j; nmatch++;
1356         break;
1357       }
1358     }
1359   }
1360   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1361   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1362   ierr = PetscFree(a1);CHKERRQ(ierr);
1363   ierr = PetscFree(a2);CHKERRQ(ierr);
1364   if (nmatch < sz1) {
1365     *flg = PETSC_FALSE;
1366   } else {
1367     *flg = PETSC_TRUE;
1368   }
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 #undef __FUNCT__
1373 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ"
1374 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1375 {
1376   PetscErrorCode ierr;
1377   IS             iscol_local;
1378   PetscInt       csize;
1379   PetscBool      isequal;
1380 
1381   PetscFunctionBegin;
1382   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1383   if (call == MAT_REUSE_MATRIX) {
1384     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1385     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1386   } else {
1387     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1388     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1389     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1390   }
1391 
1392   /* now call MatGetSubMatrix_MPIBAIJ() */
1393   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1394   if (call == MAT_INITIAL_MATRIX) {
1395     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1396     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1397   }
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #undef __FUNCT__
1402 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1403 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1404 {
1405   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1406   PetscErrorCode ierr;
1407 
1408   PetscFunctionBegin;
1409   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1410   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1416 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1417 {
1418   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1419   Mat            A  = a->A,B = a->B;
1420   PetscErrorCode ierr;
1421   PetscReal      isend[5],irecv[5];
1422 
1423   PetscFunctionBegin;
1424   info->block_size = (PetscReal)matin->rmap->bs;
1425 
1426   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1427 
1428   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1429   isend[3] = info->memory;  isend[4] = info->mallocs;
1430 
1431   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1432 
1433   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1434   isend[3] += info->memory;  isend[4] += info->mallocs;
1435   if (flag == MAT_LOCAL) {
1436     info->nz_used      = isend[0];
1437     info->nz_allocated = isend[1];
1438     info->nz_unneeded  = isend[2];
1439     info->memory       = isend[3];
1440     info->mallocs      = isend[4];
1441   } else if (flag == MAT_GLOBAL_MAX) {
1442     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1443 
1444     info->nz_used      = irecv[0];
1445     info->nz_allocated = irecv[1];
1446     info->nz_unneeded  = irecv[2];
1447     info->memory       = irecv[3];
1448     info->mallocs      = irecv[4];
1449   } else if (flag == MAT_GLOBAL_SUM) {
1450     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1451 
1452     info->nz_used      = irecv[0];
1453     info->nz_allocated = irecv[1];
1454     info->nz_unneeded  = irecv[2];
1455     info->memory       = irecv[3];
1456     info->mallocs      = irecv[4];
1457   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1458   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1459   info->fill_ratio_needed = 0;
1460   info->factor_mallocs    = 0;
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1466 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1467 {
1468   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1469   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1470   PetscErrorCode ierr;
1471 
1472   PetscFunctionBegin;
1473   switch (op) {
1474   case MAT_NEW_NONZERO_LOCATIONS:
1475   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1476   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1477   case MAT_KEEP_NONZERO_PATTERN:
1478   case MAT_NEW_NONZERO_LOCATION_ERR:
1479     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1480     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1481     break;
1482   case MAT_ROW_ORIENTED:
1483     a->roworiented = flg;
1484 
1485     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1486     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1487     break;
1488   case MAT_NEW_DIAGONALS:
1489     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1490     break;
1491   case MAT_IGNORE_OFF_PROC_ENTRIES:
1492     a->donotstash = flg;
1493     break;
1494   case MAT_USE_HASH_TABLE:
1495     a->ht_flag = flg;
1496     break;
1497   case MAT_HERMITIAN:
1498     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1499     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1500 
1501     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1502     break;
1503   case MAT_SPD:
1504     A->spd_set = PETSC_TRUE;
1505     A->spd     = flg;
1506     if (flg) {
1507       A->symmetric                  = PETSC_TRUE;
1508       A->structurally_symmetric     = PETSC_TRUE;
1509       A->symmetric_set              = PETSC_TRUE;
1510       A->structurally_symmetric_set = PETSC_TRUE;
1511     }
1512     break;
1513   case MAT_SYMMETRIC:
1514     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1515     break;
1516   case MAT_STRUCTURALLY_SYMMETRIC:
1517     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1518     break;
1519   case MAT_SYMMETRY_ETERNAL:
1520     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1521     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1522     break;
1523   case MAT_IGNORE_LOWER_TRIANGULAR:
1524     aA->ignore_ltriangular = flg;
1525     break;
1526   case MAT_ERROR_LOWER_TRIANGULAR:
1527     aA->ignore_ltriangular = flg;
1528     break;
1529   case MAT_GETROW_UPPERTRIANGULAR:
1530     aA->getrow_utriangular = flg;
1531     break;
1532   default:
1533     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1534   }
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1540 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1541 {
1542   PetscErrorCode ierr;
1543 
1544   PetscFunctionBegin;
1545   if (MAT_INITIAL_MATRIX || *B != A) {
1546     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1547   }
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #undef __FUNCT__
1552 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1553 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1554 {
1555   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1556   Mat            a     = baij->A, b=baij->B;
1557   PetscErrorCode ierr;
1558   PetscInt       nv,m,n;
1559   PetscBool      flg;
1560 
1561   PetscFunctionBegin;
1562   if (ll != rr) {
1563     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1564     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1565   }
1566   if (!ll) PetscFunctionReturn(0);
1567 
1568   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1569   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1570 
1571   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1572   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1573 
1574   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1575 
1576   /* left diagonalscale the off-diagonal part */
1577   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1578 
1579   /* scale the diagonal part */
1580   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1581 
1582   /* right diagonalscale the off-diagonal part */
1583   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1584   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1590 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1591 {
1592   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1593   PetscErrorCode ierr;
1594 
1595   PetscFunctionBegin;
1596   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatEqual_MPISBAIJ"
1604 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1605 {
1606   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1607   Mat            a,b,c,d;
1608   PetscBool      flg;
1609   PetscErrorCode ierr;
1610 
1611   PetscFunctionBegin;
1612   a = matA->A; b = matA->B;
1613   c = matB->A; d = matB->B;
1614 
1615   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1616   if (flg) {
1617     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1618   }
1619   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 #undef __FUNCT__
1624 #define __FUNCT__ "MatCopy_MPISBAIJ"
1625 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1626 {
1627   PetscErrorCode ierr;
1628   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1629   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1630 
1631   PetscFunctionBegin;
1632   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1633   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1634     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1635     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1636     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1637   } else {
1638     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1639     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1640   }
1641   PetscFunctionReturn(0);
1642 }
1643 
1644 #undef __FUNCT__
1645 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1646 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1647 {
1648   PetscErrorCode ierr;
1649 
1650   PetscFunctionBegin;
1651   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1652   PetscFunctionReturn(0);
1653 }
1654 
1655 #undef __FUNCT__
1656 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1657 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1658 {
1659   PetscErrorCode ierr;
1660   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1661   PetscBLASInt   bnz,one=1;
1662   Mat_SeqSBAIJ   *xa,*ya;
1663   Mat_SeqBAIJ    *xb,*yb;
1664 
1665   PetscFunctionBegin;
1666   if (str == SAME_NONZERO_PATTERN) {
1667     PetscScalar alpha = a;
1668     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1669     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1670     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1671     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1672     xb   = (Mat_SeqBAIJ*)xx->B->data;
1673     yb   = (Mat_SeqBAIJ*)yy->B->data;
1674     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1675     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1676     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1677   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1678     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1679     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1680     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1681   } else {
1682     Mat      B;
1683     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1684     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1685     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1686     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1687     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1688     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1689     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1690     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1691     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1692     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1693     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1694     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1695     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1696     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1697     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1698     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1699     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1700     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1701     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1702     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1703   }
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1709 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1710 {
1711   PetscErrorCode ierr;
1712   PetscInt       i;
1713   PetscBool      flg,sorted;
1714 
1715   PetscFunctionBegin;
1716   for (i = 0; i < n; i++) {
1717     ierr = ISSorted(irow[i],&sorted);CHKERRQ(ierr);
1718     if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row index set %d not sorted",i);
1719     ierr = ISSorted(icol[i],&sorted);CHKERRQ(ierr);
1720     if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Column index set %d not sorted",i);
1721   }
1722   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1723   for (i=0; i<n; i++) {
1724     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1725     if (!flg) { /* *B[i] is non-symmetric, set flag */
1726       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1727     }
1728   }
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatShift_MPISBAIJ"
1734 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1735 {
1736   PetscErrorCode ierr;
1737   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1738   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1739 
1740   PetscFunctionBegin;
1741   if (!Y->preallocated) {
1742     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1743   } else if (!aij->nz) {
1744     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1745   }
1746   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 /* -------------------------------------------------------------------*/
1751 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1752                                        MatGetRow_MPISBAIJ,
1753                                        MatRestoreRow_MPISBAIJ,
1754                                        MatMult_MPISBAIJ,
1755                                /*  4*/ MatMultAdd_MPISBAIJ,
1756                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1757                                        MatMultAdd_MPISBAIJ,
1758                                        0,
1759                                        0,
1760                                        0,
1761                                /* 10*/ 0,
1762                                        0,
1763                                        0,
1764                                        MatSOR_MPISBAIJ,
1765                                        MatTranspose_MPISBAIJ,
1766                                /* 15*/ MatGetInfo_MPISBAIJ,
1767                                        MatEqual_MPISBAIJ,
1768                                        MatGetDiagonal_MPISBAIJ,
1769                                        MatDiagonalScale_MPISBAIJ,
1770                                        MatNorm_MPISBAIJ,
1771                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1772                                        MatAssemblyEnd_MPISBAIJ,
1773                                        MatSetOption_MPISBAIJ,
1774                                        MatZeroEntries_MPISBAIJ,
1775                                /* 24*/ 0,
1776                                        0,
1777                                        0,
1778                                        0,
1779                                        0,
1780                                /* 29*/ MatSetUp_MPISBAIJ,
1781                                        0,
1782                                        0,
1783                                        0,
1784                                        0,
1785                                /* 34*/ MatDuplicate_MPISBAIJ,
1786                                        0,
1787                                        0,
1788                                        0,
1789                                        0,
1790                                /* 39*/ MatAXPY_MPISBAIJ,
1791                                        MatGetSubMatrices_MPISBAIJ,
1792                                        MatIncreaseOverlap_MPISBAIJ,
1793                                        MatGetValues_MPISBAIJ,
1794                                        MatCopy_MPISBAIJ,
1795                                /* 44*/ 0,
1796                                        MatScale_MPISBAIJ,
1797                                        MatShift_MPISBAIJ,
1798                                        0,
1799                                        0,
1800                                /* 49*/ 0,
1801                                        0,
1802                                        0,
1803                                        0,
1804                                        0,
1805                                /* 54*/ 0,
1806                                        0,
1807                                        MatSetUnfactored_MPISBAIJ,
1808                                        0,
1809                                        MatSetValuesBlocked_MPISBAIJ,
1810                                /* 59*/ MatGetSubMatrix_MPISBAIJ,
1811                                        0,
1812                                        0,
1813                                        0,
1814                                        0,
1815                                /* 64*/ 0,
1816                                        0,
1817                                        0,
1818                                        0,
1819                                        0,
1820                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1821                                        0,
1822                                        0,
1823                                        0,
1824                                        0,
1825                                /* 74*/ 0,
1826                                        0,
1827                                        0,
1828                                        0,
1829                                        0,
1830                                /* 79*/ 0,
1831                                        0,
1832                                        0,
1833                                        0,
1834                                        MatLoad_MPISBAIJ,
1835                                /* 84*/ 0,
1836                                        0,
1837                                        0,
1838                                        0,
1839                                        0,
1840                                /* 89*/ 0,
1841                                        0,
1842                                        0,
1843                                        0,
1844                                        0,
1845                                /* 94*/ 0,
1846                                        0,
1847                                        0,
1848                                        0,
1849                                        0,
1850                                /* 99*/ 0,
1851                                        0,
1852                                        0,
1853                                        0,
1854                                        0,
1855                                /*104*/ 0,
1856                                        MatRealPart_MPISBAIJ,
1857                                        MatImaginaryPart_MPISBAIJ,
1858                                        MatGetRowUpperTriangular_MPISBAIJ,
1859                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1860                                /*109*/ 0,
1861                                        0,
1862                                        0,
1863                                        0,
1864                                        0,
1865                                /*114*/ 0,
1866                                        0,
1867                                        0,
1868                                        0,
1869                                        0,
1870                                /*119*/ 0,
1871                                        0,
1872                                        0,
1873                                        0,
1874                                        0,
1875                                /*124*/ 0,
1876                                        0,
1877                                        0,
1878                                        0,
1879                                        0,
1880                                /*129*/ 0,
1881                                        0,
1882                                        0,
1883                                        0,
1884                                        0,
1885                                /*134*/ 0,
1886                                        0,
1887                                        0,
1888                                        0,
1889                                        0,
1890                                /*139*/ 0,
1891                                        0,
1892                                        0,
1893                                        0,
1894                                        0,
1895                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1896 };
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1900 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1901 {
1902   PetscFunctionBegin;
1903   *a = ((Mat_MPISBAIJ*)A->data)->A;
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 #undef __FUNCT__
1908 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1909 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1910 {
1911   Mat_MPISBAIJ   *b;
1912   PetscErrorCode ierr;
1913   PetscInt       i,mbs,Mbs;
1914 
1915   PetscFunctionBegin;
1916   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1917   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1918   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1919   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1920 
1921   b   = (Mat_MPISBAIJ*)B->data;
1922   mbs = B->rmap->n/bs;
1923   Mbs = B->rmap->N/bs;
1924   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1925 
1926   B->rmap->bs = bs;
1927   b->bs2      = bs*bs;
1928   b->mbs      = mbs;
1929   b->Mbs      = Mbs;
1930   b->nbs      = B->cmap->n/bs;
1931   b->Nbs      = B->cmap->N/bs;
1932 
1933   for (i=0; i<=b->size; i++) {
1934     b->rangebs[i] = B->rmap->range[i]/bs;
1935   }
1936   b->rstartbs = B->rmap->rstart/bs;
1937   b->rendbs   = B->rmap->rend/bs;
1938 
1939   b->cstartbs = B->cmap->rstart/bs;
1940   b->cendbs   = B->cmap->rend/bs;
1941 
1942   if (!B->preallocated) {
1943     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1944     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1945     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1946     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1947     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1948     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1949     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1950     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1951     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1952   }
1953 
1954   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1955   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1956 
1957   B->preallocated = PETSC_TRUE;
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1963 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1964 {
1965   PetscInt       m,rstart,cstart,cend;
1966   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1967   const PetscInt *JJ    =0;
1968   PetscScalar    *values=0;
1969   PetscErrorCode ierr;
1970 
1971   PetscFunctionBegin;
1972   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1973   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1974   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1975   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1976   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1977   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1978   m      = B->rmap->n/bs;
1979   rstart = B->rmap->rstart/bs;
1980   cstart = B->cmap->rstart/bs;
1981   cend   = B->cmap->rend/bs;
1982 
1983   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1984   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1985   for (i=0; i<m; i++) {
1986     nz = ii[i+1] - ii[i];
1987     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1988     nz_max = PetscMax(nz_max,nz);
1989     JJ     = jj + ii[i];
1990     for (j=0; j<nz; j++) {
1991       if (*JJ >= cstart) break;
1992       JJ++;
1993     }
1994     d = 0;
1995     for (; j<nz; j++) {
1996       if (*JJ++ >= cend) break;
1997       d++;
1998     }
1999     d_nnz[i] = d;
2000     o_nnz[i] = nz - d;
2001   }
2002   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2003   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2004 
2005   values = (PetscScalar*)V;
2006   if (!values) {
2007     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2008     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2009   }
2010   for (i=0; i<m; i++) {
2011     PetscInt          row    = i + rstart;
2012     PetscInt          ncols  = ii[i+1] - ii[i];
2013     const PetscInt    *icols = jj + ii[i];
2014     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2015     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2016   }
2017 
2018   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2019   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2020   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2021   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 /*MC
2026    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2027    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2028    the matrix is stored.
2029 
2030   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2031   can call MatSetOption(Mat, MAT_HERMITIAN);
2032 
2033    Options Database Keys:
2034 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2035 
2036   Level: beginner
2037 
2038 .seealso: MatCreateMPISBAIJ
2039 M*/
2040 
2041 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
2042 
2043 #undef __FUNCT__
2044 #define __FUNCT__ "MatCreate_MPISBAIJ"
2045 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2046 {
2047   Mat_MPISBAIJ   *b;
2048   PetscErrorCode ierr;
2049   PetscBool      flg = PETSC_FALSE;
2050 
2051   PetscFunctionBegin;
2052   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2053   B->data = (void*)b;
2054   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2055 
2056   B->ops->destroy = MatDestroy_MPISBAIJ;
2057   B->ops->view    = MatView_MPISBAIJ;
2058   B->assembled    = PETSC_FALSE;
2059   B->insertmode   = NOT_SET_VALUES;
2060 
2061   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2062   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2063 
2064   /* build local table of row and column ownerships */
2065   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2066 
2067   /* build cache for off array entries formed */
2068   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2069 
2070   b->donotstash  = PETSC_FALSE;
2071   b->colmap      = NULL;
2072   b->garray      = NULL;
2073   b->roworiented = PETSC_TRUE;
2074 
2075   /* stuff used in block assembly */
2076   b->barray = 0;
2077 
2078   /* stuff used for matrix vector multiply */
2079   b->lvec    = 0;
2080   b->Mvctx   = 0;
2081   b->slvec0  = 0;
2082   b->slvec0b = 0;
2083   b->slvec1  = 0;
2084   b->slvec1a = 0;
2085   b->slvec1b = 0;
2086   b->sMvctx  = 0;
2087 
2088   /* stuff for MatGetRow() */
2089   b->rowindices   = 0;
2090   b->rowvalues    = 0;
2091   b->getrowactive = PETSC_FALSE;
2092 
2093   /* hash table stuff */
2094   b->ht           = 0;
2095   b->hd           = 0;
2096   b->ht_size      = 0;
2097   b->ht_flag      = PETSC_FALSE;
2098   b->ht_fact      = 0;
2099   b->ht_total_ct  = 0;
2100   b->ht_insert_ct = 0;
2101 
2102   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
2103   b->ijonly = PETSC_FALSE;
2104 
2105   b->in_loc = 0;
2106   b->v_loc  = 0;
2107   b->n_loc  = 0;
2108 
2109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
2112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2114   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
2115 #if defined(PETSC_HAVE_ELEMENTAL)
2116   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2117 #endif
2118 
2119   B->symmetric                  = PETSC_TRUE;
2120   B->structurally_symmetric     = PETSC_TRUE;
2121   B->symmetric_set              = PETSC_TRUE;
2122   B->structurally_symmetric_set = PETSC_TRUE;
2123 
2124   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2125   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2126   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2127   if (flg) {
2128     PetscReal fact = 1.39;
2129     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2130     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2131     if (fact <= 1.0) fact = 1.39;
2132     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2133     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2134   }
2135   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 /*MC
2140    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2141 
2142    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2143    and MATMPISBAIJ otherwise.
2144 
2145    Options Database Keys:
2146 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2147 
2148   Level: beginner
2149 
2150 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2151 M*/
2152 
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
2155 /*@C
2156    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2157    the user should preallocate the matrix storage by setting the parameters
2158    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2159    performance can be increased by more than a factor of 50.
2160 
2161    Collective on Mat
2162 
2163    Input Parameters:
2164 +  B - the matrix
2165 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2166           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2167 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2168            submatrix  (same for all local rows)
2169 .  d_nnz - array containing the number of block nonzeros in the various block rows
2170            in the upper triangular and diagonal part of the in diagonal portion of the local
2171            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2172            for the diagonal entry and set a value even if it is zero.
2173 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2174            submatrix (same for all local rows).
2175 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2176            off-diagonal portion of the local submatrix that is right of the diagonal
2177            (possibly different for each block row) or NULL.
2178 
2179 
2180    Options Database Keys:
2181 .   -mat_no_unroll - uses code that does not unroll the loops in the
2182                      block calculations (much slower)
2183 .   -mat_block_size - size of the blocks to use
2184 
2185    Notes:
2186 
2187    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2188    than it must be used on all processors that share the object for that argument.
2189 
2190    If the *_nnz parameter is given then the *_nz parameter is ignored
2191 
2192    Storage Information:
2193    For a square global matrix we define each processor's diagonal portion
2194    to be its local rows and the corresponding columns (a square submatrix);
2195    each processor's off-diagonal portion encompasses the remainder of the
2196    local matrix (a rectangular submatrix).
2197 
2198    The user can specify preallocated storage for the diagonal part of
2199    the local submatrix with either d_nz or d_nnz (not both).  Set
2200    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2201    memory allocation.  Likewise, specify preallocated storage for the
2202    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2203 
2204    You can call MatGetInfo() to get information on how effective the preallocation was;
2205    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2206    You can also run with the option -info and look for messages with the string
2207    malloc in them to see if additional memory allocation was needed.
2208 
2209    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2210    the figure below we depict these three local rows and all columns (0-11).
2211 
2212 .vb
2213            0 1 2 3 4 5 6 7 8 9 10 11
2214           --------------------------
2215    row 3  |. . . d d d o o o o  o  o
2216    row 4  |. . . d d d o o o o  o  o
2217    row 5  |. . . d d d o o o o  o  o
2218           --------------------------
2219 .ve
2220 
2221    Thus, any entries in the d locations are stored in the d (diagonal)
2222    submatrix, and any entries in the o locations are stored in the
2223    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2224    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2225 
2226    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2227    plus the diagonal part of the d matrix,
2228    and o_nz should indicate the number of block nonzeros per row in the o matrix
2229 
2230    In general, for PDE problems in which most nonzeros are near the diagonal,
2231    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2232    or you will get TERRIBLE performance; see the users' manual chapter on
2233    matrices.
2234 
2235    Level: intermediate
2236 
2237 .keywords: matrix, block, aij, compressed row, sparse, parallel
2238 
2239 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2240 @*/
2241 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2242 {
2243   PetscErrorCode ierr;
2244 
2245   PetscFunctionBegin;
2246   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2247   PetscValidType(B,1);
2248   PetscValidLogicalCollectiveInt(B,bs,2);
2249   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "MatCreateSBAIJ"
2255 /*@C
2256    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2257    (block compressed row).  For good matrix assembly performance
2258    the user should preallocate the matrix storage by setting the parameters
2259    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2260    performance can be increased by more than a factor of 50.
2261 
2262    Collective on MPI_Comm
2263 
2264    Input Parameters:
2265 +  comm - MPI communicator
2266 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2267           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2268 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2269            This value should be the same as the local size used in creating the
2270            y vector for the matrix-vector product y = Ax.
2271 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2272            This value should be the same as the local size used in creating the
2273            x vector for the matrix-vector product y = Ax.
2274 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2275 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2276 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2277            submatrix  (same for all local rows)
2278 .  d_nnz - array containing the number of block nonzeros in the various block rows
2279            in the upper triangular portion of the in diagonal portion of the local
2280            (possibly different for each block block row) or NULL.
2281            If you plan to factor the matrix you must leave room for the diagonal entry and
2282            set its value even if it is zero.
2283 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2284            submatrix (same for all local rows).
2285 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2286            off-diagonal portion of the local submatrix (possibly different for
2287            each block row) or NULL.
2288 
2289    Output Parameter:
2290 .  A - the matrix
2291 
2292    Options Database Keys:
2293 .   -mat_no_unroll - uses code that does not unroll the loops in the
2294                      block calculations (much slower)
2295 .   -mat_block_size - size of the blocks to use
2296 .   -mat_mpi - use the parallel matrix data structures even on one processor
2297                (defaults to using SeqBAIJ format on one processor)
2298 
2299    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2300    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2301    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2302 
2303    Notes:
2304    The number of rows and columns must be divisible by blocksize.
2305    This matrix type does not support complex Hermitian operation.
2306 
2307    The user MUST specify either the local or global matrix dimensions
2308    (possibly both).
2309 
2310    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2311    than it must be used on all processors that share the object for that argument.
2312 
2313    If the *_nnz parameter is given then the *_nz parameter is ignored
2314 
2315    Storage Information:
2316    For a square global matrix we define each processor's diagonal portion
2317    to be its local rows and the corresponding columns (a square submatrix);
2318    each processor's off-diagonal portion encompasses the remainder of the
2319    local matrix (a rectangular submatrix).
2320 
2321    The user can specify preallocated storage for the diagonal part of
2322    the local submatrix with either d_nz or d_nnz (not both).  Set
2323    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2324    memory allocation.  Likewise, specify preallocated storage for the
2325    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2326 
2327    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2328    the figure below we depict these three local rows and all columns (0-11).
2329 
2330 .vb
2331            0 1 2 3 4 5 6 7 8 9 10 11
2332           --------------------------
2333    row 3  |. . . d d d o o o o  o  o
2334    row 4  |. . . d d d o o o o  o  o
2335    row 5  |. . . d d d o o o o  o  o
2336           --------------------------
2337 .ve
2338 
2339    Thus, any entries in the d locations are stored in the d (diagonal)
2340    submatrix, and any entries in the o locations are stored in the
2341    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2342    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2343 
2344    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2345    plus the diagonal part of the d matrix,
2346    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2347    In general, for PDE problems in which most nonzeros are near the diagonal,
2348    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2349    or you will get TERRIBLE performance; see the users' manual chapter on
2350    matrices.
2351 
2352    Level: intermediate
2353 
2354 .keywords: matrix, block, aij, compressed row, sparse, parallel
2355 
2356 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2357 @*/
2358 
2359 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2360 {
2361   PetscErrorCode ierr;
2362   PetscMPIInt    size;
2363 
2364   PetscFunctionBegin;
2365   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2366   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2367   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2368   if (size > 1) {
2369     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2370     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2371   } else {
2372     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2373     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2374   }
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 
2379 #undef __FUNCT__
2380 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2381 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2382 {
2383   Mat            mat;
2384   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2385   PetscErrorCode ierr;
2386   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2387   PetscScalar    *array;
2388 
2389   PetscFunctionBegin;
2390   *newmat = 0;
2391 
2392   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2393   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2394   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2395   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2396   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2397   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2398 
2399   mat->factortype   = matin->factortype;
2400   mat->preallocated = PETSC_TRUE;
2401   mat->assembled    = PETSC_TRUE;
2402   mat->insertmode   = NOT_SET_VALUES;
2403 
2404   a      = (Mat_MPISBAIJ*)mat->data;
2405   a->bs2 = oldmat->bs2;
2406   a->mbs = oldmat->mbs;
2407   a->nbs = oldmat->nbs;
2408   a->Mbs = oldmat->Mbs;
2409   a->Nbs = oldmat->Nbs;
2410 
2411 
2412   a->size         = oldmat->size;
2413   a->rank         = oldmat->rank;
2414   a->donotstash   = oldmat->donotstash;
2415   a->roworiented  = oldmat->roworiented;
2416   a->rowindices   = 0;
2417   a->rowvalues    = 0;
2418   a->getrowactive = PETSC_FALSE;
2419   a->barray       = 0;
2420   a->rstartbs     = oldmat->rstartbs;
2421   a->rendbs       = oldmat->rendbs;
2422   a->cstartbs     = oldmat->cstartbs;
2423   a->cendbs       = oldmat->cendbs;
2424 
2425   /* hash table stuff */
2426   a->ht           = 0;
2427   a->hd           = 0;
2428   a->ht_size      = 0;
2429   a->ht_flag      = oldmat->ht_flag;
2430   a->ht_fact      = oldmat->ht_fact;
2431   a->ht_total_ct  = 0;
2432   a->ht_insert_ct = 0;
2433 
2434   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2435   if (oldmat->colmap) {
2436 #if defined(PETSC_USE_CTABLE)
2437     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2438 #else
2439     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2440     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2441     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2442 #endif
2443   } else a->colmap = 0;
2444 
2445   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2446     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2447     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2448     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2449   } else a->garray = 0;
2450 
2451   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2452   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2453   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2454   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2455   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2456 
2457   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2458   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2459   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2460   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2461 
2462   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2463   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2464   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2465   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2466   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2467   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2468   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2469   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2470   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2471   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2472   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2473   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2474   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2475 
2476   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2477   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2478   a->sMvctx = oldmat->sMvctx;
2479   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2480 
2481   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2482   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2483   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2484   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2485   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2486   *newmat = mat;
2487   PetscFunctionReturn(0);
2488 }
2489 
2490 #undef __FUNCT__
2491 #define __FUNCT__ "MatLoad_MPISBAIJ"
2492 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2493 {
2494   PetscErrorCode ierr;
2495   PetscInt       i,nz,j,rstart,rend;
2496   PetscScalar    *vals,*buf;
2497   MPI_Comm       comm;
2498   MPI_Status     status;
2499   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2500   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2501   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2502   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2503   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2504   PetscInt       dcount,kmax,k,nzcount,tmp;
2505   int            fd;
2506 
2507   PetscFunctionBegin;
2508   /* force binary viewer to load .info file if it has not yet done so */
2509   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2510   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2511   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2512   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2513   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2514   if (bs < 0) bs = 1;
2515 
2516   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2517   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2518   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2519   if (!rank) {
2520     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2521     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2522     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2523   }
2524 
2525   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2526   M    = header[1];
2527   N    = header[2];
2528 
2529   /* If global sizes are set, check if they are consistent with that given in the file */
2530   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2531   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2532 
2533   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2534 
2535   /*
2536      This code adds extra rows to make sure the number of rows is
2537      divisible by the blocksize
2538   */
2539   Mbs        = M/bs;
2540   extra_rows = bs - M + bs*(Mbs);
2541   if (extra_rows == bs) extra_rows = 0;
2542   else                  Mbs++;
2543   if (extra_rows &&!rank) {
2544     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2545   }
2546 
2547   /* determine ownership of all rows */
2548   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2549     mbs = Mbs/size + ((Mbs % size) > rank);
2550     m   = mbs*bs;
2551   } else { /* User Set */
2552     m   = newmat->rmap->n;
2553     mbs = m/bs;
2554   }
2555   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2556   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2557   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2558   rowners[0] = 0;
2559   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2560   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2561   rstart = rowners[rank];
2562   rend   = rowners[rank+1];
2563 
2564   /* distribute row lengths to all processors */
2565   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2566   if (!rank) {
2567     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2568     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2569     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2570     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2571     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2572     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2573     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2574   } else {
2575     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2576   }
2577 
2578   if (!rank) {   /* procs[0] */
2579     /* calculate the number of nonzeros on each processor */
2580     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2581     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2582     for (i=0; i<size; i++) {
2583       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2584         procsnz[i] += rowlengths[j];
2585       }
2586     }
2587     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2588 
2589     /* determine max buffer needed and allocate it */
2590     maxnz = 0;
2591     for (i=0; i<size; i++) {
2592       maxnz = PetscMax(maxnz,procsnz[i]);
2593     }
2594     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2595 
2596     /* read in my part of the matrix column indices  */
2597     nz     = procsnz[0];
2598     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2599     mycols = ibuf;
2600     if (size == 1) nz -= extra_rows;
2601     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2602     if (size == 1) {
2603       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2604     }
2605 
2606     /* read in every ones (except the last) and ship off */
2607     for (i=1; i<size-1; i++) {
2608       nz   = procsnz[i];
2609       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2610       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2611     }
2612     /* read in the stuff for the last proc */
2613     if (size != 1) {
2614       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2615       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2616       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2617       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2618     }
2619     ierr = PetscFree(cols);CHKERRQ(ierr);
2620   } else {  /* procs[i], i>0 */
2621     /* determine buffer space needed for message */
2622     nz = 0;
2623     for (i=0; i<m; i++) nz += locrowlens[i];
2624     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2625     mycols = ibuf;
2626     /* receive message of column indices*/
2627     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2628     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2629     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2630   }
2631 
2632   /* loop over local rows, determining number of off diagonal entries */
2633   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2634   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2635   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2636   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2637   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2638   rowcount = 0;
2639   nzcount  = 0;
2640   for (i=0; i<mbs; i++) {
2641     dcount  = 0;
2642     odcount = 0;
2643     for (j=0; j<bs; j++) {
2644       kmax = locrowlens[rowcount];
2645       for (k=0; k<kmax; k++) {
2646         tmp = mycols[nzcount++]/bs; /* block col. index */
2647         if (!mask[tmp]) {
2648           mask[tmp] = 1;
2649           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2650           else masked1[dcount++] = tmp; /* entry in diag portion */
2651         }
2652       }
2653       rowcount++;
2654     }
2655 
2656     dlens[i]  = dcount;  /* d_nzz[i] */
2657     odlens[i] = odcount; /* o_nzz[i] */
2658 
2659     /* zero out the mask elements we set */
2660     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2661     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2662   }
2663   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2664   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2665   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2666 
2667   if (!rank) {
2668     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2669     /* read in my part of the matrix numerical values  */
2670     nz     = procsnz[0];
2671     vals   = buf;
2672     mycols = ibuf;
2673     if (size == 1) nz -= extra_rows;
2674     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2675     if (size == 1) {
2676       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2677     }
2678 
2679     /* insert into matrix */
2680     jj = rstart*bs;
2681     for (i=0; i<m; i++) {
2682       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2683       mycols += locrowlens[i];
2684       vals   += locrowlens[i];
2685       jj++;
2686     }
2687 
2688     /* read in other processors (except the last one) and ship out */
2689     for (i=1; i<size-1; i++) {
2690       nz   = procsnz[i];
2691       vals = buf;
2692       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2693       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2694     }
2695     /* the last proc */
2696     if (size != 1) {
2697       nz   = procsnz[i] - extra_rows;
2698       vals = buf;
2699       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2700       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2701       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2702     }
2703     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2704 
2705   } else {
2706     /* receive numeric values */
2707     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2708 
2709     /* receive message of values*/
2710     vals   = buf;
2711     mycols = ibuf;
2712     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2713     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2714     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2715 
2716     /* insert into matrix */
2717     jj = rstart*bs;
2718     for (i=0; i<m; i++) {
2719       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2720       mycols += locrowlens[i];
2721       vals   += locrowlens[i];
2722       jj++;
2723     }
2724   }
2725 
2726   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2727   ierr = PetscFree(buf);CHKERRQ(ierr);
2728   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2729   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2730   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2731   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2732   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2733   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2734   PetscFunctionReturn(0);
2735 }
2736 
2737 #undef __FUNCT__
2738 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2739 /*XXXXX@
2740    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2741 
2742    Input Parameters:
2743 .  mat  - the matrix
2744 .  fact - factor
2745 
2746    Not Collective on Mat, each process can have a different hash factor
2747 
2748    Level: advanced
2749 
2750   Notes:
2751    This can also be set by the command line option: -mat_use_hash_table fact
2752 
2753 .keywords: matrix, hashtable, factor, HT
2754 
2755 .seealso: MatSetOption()
2756 @XXXXX*/
2757 
2758 
2759 #undef __FUNCT__
2760 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2761 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2762 {
2763   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2764   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2765   PetscReal      atmp;
2766   PetscReal      *work,*svalues,*rvalues;
2767   PetscErrorCode ierr;
2768   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2769   PetscMPIInt    rank,size;
2770   PetscInt       *rowners_bs,dest,count,source;
2771   PetscScalar    *va;
2772   MatScalar      *ba;
2773   MPI_Status     stat;
2774 
2775   PetscFunctionBegin;
2776   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2777   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2778   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2779 
2780   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2781   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2782 
2783   bs  = A->rmap->bs;
2784   mbs = a->mbs;
2785   Mbs = a->Mbs;
2786   ba  = b->a;
2787   bi  = b->i;
2788   bj  = b->j;
2789 
2790   /* find ownerships */
2791   rowners_bs = A->rmap->range;
2792 
2793   /* each proc creates an array to be distributed */
2794   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2795   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2796 
2797   /* row_max for B */
2798   if (rank != size-1) {
2799     for (i=0; i<mbs; i++) {
2800       ncols = bi[1] - bi[0]; bi++;
2801       brow  = bs*i;
2802       for (j=0; j<ncols; j++) {
2803         bcol = bs*(*bj);
2804         for (kcol=0; kcol<bs; kcol++) {
2805           col  = bcol + kcol;                /* local col index */
2806           col += rowners_bs[rank+1];      /* global col index */
2807           for (krow=0; krow<bs; krow++) {
2808             atmp = PetscAbsScalar(*ba); ba++;
2809             row  = brow + krow;   /* local row index */
2810             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2811             if (work[col] < atmp) work[col] = atmp;
2812           }
2813         }
2814         bj++;
2815       }
2816     }
2817 
2818     /* send values to its owners */
2819     for (dest=rank+1; dest<size; dest++) {
2820       svalues = work + rowners_bs[dest];
2821       count   = rowners_bs[dest+1]-rowners_bs[dest];
2822       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2823     }
2824   }
2825 
2826   /* receive values */
2827   if (rank) {
2828     rvalues = work;
2829     count   = rowners_bs[rank+1]-rowners_bs[rank];
2830     for (source=0; source<rank; source++) {
2831       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2832       /* process values */
2833       for (i=0; i<count; i++) {
2834         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2835       }
2836     }
2837   }
2838 
2839   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2840   ierr = PetscFree(work);CHKERRQ(ierr);
2841   PetscFunctionReturn(0);
2842 }
2843 
2844 #undef __FUNCT__
2845 #define __FUNCT__ "MatSOR_MPISBAIJ"
2846 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2847 {
2848   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2849   PetscErrorCode    ierr;
2850   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2851   PetscScalar       *x,*ptr,*from;
2852   Vec               bb1;
2853   const PetscScalar *b;
2854 
2855   PetscFunctionBegin;
2856   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2857   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2858 
2859   if (flag == SOR_APPLY_UPPER) {
2860     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2861     PetscFunctionReturn(0);
2862   }
2863 
2864   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2865     if (flag & SOR_ZERO_INITIAL_GUESS) {
2866       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2867       its--;
2868     }
2869 
2870     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2871     while (its--) {
2872 
2873       /* lower triangular part: slvec0b = - B^T*xx */
2874       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2875 
2876       /* copy xx into slvec0a */
2877       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2878       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2879       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2880       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2881 
2882       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2883 
2884       /* copy bb into slvec1a */
2885       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2886       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2887       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2888       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2889 
2890       /* set slvec1b = 0 */
2891       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2892 
2893       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2894       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2895       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2896       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2897 
2898       /* upper triangular part: bb1 = bb1 - B*x */
2899       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2900 
2901       /* local diagonal sweep */
2902       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2903     }
2904     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2905   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2906     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2907   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2908     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2909   } else if (flag & SOR_EISENSTAT) {
2910     Vec               xx1;
2911     PetscBool         hasop;
2912     const PetscScalar *diag;
2913     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2914     PetscInt          i,n;
2915 
2916     if (!mat->xx1) {
2917       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2918       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2919     }
2920     xx1 = mat->xx1;
2921     bb1 = mat->bb1;
2922 
2923     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2924 
2925     if (!mat->diag) {
2926       /* this is wrong for same matrix with new nonzero values */
2927       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2928       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2929     }
2930     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2931 
2932     if (hasop) {
2933       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2934       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2935     } else {
2936       /*
2937           These two lines are replaced by code that may be a bit faster for a good compiler
2938       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2939       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2940       */
2941       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2942       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2943       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2944       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2945       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2946       if (omega == 1.0) {
2947         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2948         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2949       } else {
2950         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2951         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2952       }
2953       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2954       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2955       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2956       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2957     }
2958 
2959     /* multiply off-diagonal portion of matrix */
2960     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2961     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2962     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2963     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2964     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2965     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2966     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2967     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2968     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2969     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2970 
2971     /* local sweep */
2972     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2973     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2974   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2975   PetscFunctionReturn(0);
2976 }
2977 
2978 #undef __FUNCT__
2979 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2980 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2981 {
2982   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2983   PetscErrorCode ierr;
2984   Vec            lvec1,bb1;
2985 
2986   PetscFunctionBegin;
2987   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2988   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2989 
2990   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2991     if (flag & SOR_ZERO_INITIAL_GUESS) {
2992       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2993       its--;
2994     }
2995 
2996     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2997     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2998     while (its--) {
2999       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3000 
3001       /* lower diagonal part: bb1 = bb - B^T*xx */
3002       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3003       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3004 
3005       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3006       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3007       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3008 
3009       /* upper diagonal part: bb1 = bb1 - B*x */
3010       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3011       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3012 
3013       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3014 
3015       /* diagonal sweep */
3016       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3017     }
3018     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3019     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3020   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3021   PetscFunctionReturn(0);
3022 }
3023 
3024 #undef __FUNCT__
3025 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
3026 /*@
3027      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3028          CSR format the local rows.
3029 
3030    Collective on MPI_Comm
3031 
3032    Input Parameters:
3033 +  comm - MPI communicator
3034 .  bs - the block size, only a block size of 1 is supported
3035 .  m - number of local rows (Cannot be PETSC_DECIDE)
3036 .  n - This value should be the same as the local size used in creating the
3037        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3038        calculated if N is given) For square matrices n is almost always m.
3039 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3040 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3041 .   i - row indices
3042 .   j - column indices
3043 -   a - matrix values
3044 
3045    Output Parameter:
3046 .   mat - the matrix
3047 
3048    Level: intermediate
3049 
3050    Notes:
3051        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3052      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3053      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3054 
3055        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3056 
3057 .keywords: matrix, aij, compressed row, sparse, parallel
3058 
3059 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3060           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3061 @*/
3062 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3063 {
3064   PetscErrorCode ierr;
3065 
3066 
3067   PetscFunctionBegin;
3068   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3069   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3070   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3071   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3072   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3073   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3074   PetscFunctionReturn(0);
3075 }
3076 
3077 
3078 #undef __FUNCT__
3079 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
3080 /*@C
3081    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3082    (the default parallel PETSc format).
3083 
3084    Collective on MPI_Comm
3085 
3086    Input Parameters:
3087 +  B - the matrix
3088 .  bs - the block size
3089 .  i - the indices into j for the start of each local row (starts with zero)
3090 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3091 -  v - optional values in the matrix
3092 
3093    Level: developer
3094 
3095 .keywords: matrix, aij, compressed row, sparse, parallel
3096 
3097 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3098 @*/
3099 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3100 {
3101   PetscErrorCode ierr;
3102 
3103   PetscFunctionBegin;
3104   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3105   PetscFunctionReturn(0);
3106 }
3107 
3108 #undef __FUNCT__
3109 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
3110 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3111 {
3112   PetscErrorCode ierr;
3113   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3114   PetscInt       *indx;
3115   PetscScalar    *values;
3116 
3117   PetscFunctionBegin;
3118   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3119   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3120     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3121     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3122     PetscInt       *bindx,rmax=a->rmax,j;
3123 
3124     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3125     mbs = m/bs; Nbs = N/cbs;
3126     if (n == PETSC_DECIDE) {
3127       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3128     }
3129     /* Check sum(n) = Nbs */
3130     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3131     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3132 
3133     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3134     rstart -= mbs;
3135 
3136     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3137     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3138     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3139     for (i=0; i<mbs; i++) {
3140       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3141       nnz = nnz/bs;
3142       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3143       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3144       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3145     }
3146     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3147     ierr = PetscFree(bindx);CHKERRQ(ierr);
3148 
3149     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3150     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3151     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3152     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3153     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3154     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3155   }
3156 
3157   /* numeric phase */
3158   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3159   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3160 
3161   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3162   for (i=0; i<m; i++) {
3163     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3164     Ii   = i + rstart;
3165     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3166     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3167   }
3168   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3169   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3170   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3171   PetscFunctionReturn(0);
3172 }
3173