xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 7ea3e4caddcdc8968ce8b58701132bea2835aba8)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
13 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
14 {
15   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
16   PetscErrorCode ierr;
17   PetscInt       i,*idxb = 0;
18   PetscScalar    *va,*vb;
19   Vec            vtmp;
20 
21   PetscFunctionBegin;
22   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
23   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
24   if (idx) {
25     for (i=0; i<A->rmap->n; i++) {
26       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
27     }
28   }
29 
30   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
31   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
32   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
33   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
34 
35   for (i=0; i<A->rmap->n; i++) {
36     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
37       va[i] = vb[i];
38       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
39     }
40   }
41 
42   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
43   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
44   ierr = PetscFree(idxb);CHKERRQ(ierr);
45   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
51 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
52 {
53   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
54   PetscErrorCode ierr;
55 
56   PetscFunctionBegin;
57   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
58   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
59   PetscFunctionReturn(0);
60 }
61 
62 #undef __FUNCT__
63 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
64 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
65 {
66   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
67   PetscErrorCode ierr;
68 
69   PetscFunctionBegin;
70   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
71   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
72   PetscFunctionReturn(0);
73 }
74 
75 /*
76      Local utility routine that creates a mapping from the global column
77    number to the local number in the off-diagonal part of the local
78    storage of the matrix.  This is done in a non scalable way since the
79    length of colmap equals the global matrix length.
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
83 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
84 {
85   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
86   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
87   PetscErrorCode ierr;
88   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
89 
90   PetscFunctionBegin;
91 #if defined(PETSC_USE_CTABLE)
92   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
93   for (i=0; i<nbs; i++) {
94     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
95   }
96 #else
97   ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
98   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
99   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
100   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
101 #endif
102   PetscFunctionReturn(0);
103 }
104 
105 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
106   { \
107  \
108     brow = row/bs;  \
109     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
110     rmax = aimax[brow]; nrow = ailen[brow]; \
111     bcol = col/bs; \
112     ridx = row % bs; cidx = col % bs; \
113     low  = 0; high = nrow; \
114     while (high-low > 3) { \
115       t = (low+high)/2; \
116       if (rp[t] > bcol) high = t; \
117       else              low  = t; \
118     } \
119     for (_i=low; _i<high; _i++) { \
120       if (rp[_i] > bcol) break; \
121       if (rp[_i] == bcol) { \
122         bap = ap +  bs2*_i + bs*cidx + ridx; \
123         if (addv == ADD_VALUES) *bap += value;  \
124         else                    *bap  = value;  \
125         goto a_noinsert; \
126       } \
127     } \
128     if (a->nonew == 1) goto a_noinsert; \
129     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
130     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
131     N = nrow++ - 1;  \
132     /* shift up all the later entries in this row */ \
133     for (ii=N; ii>=_i; ii--) { \
134       rp[ii+1] = rp[ii]; \
135       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
136     } \
137     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
138     rp[_i]                      = bcol;  \
139     ap[bs2*_i + bs*cidx + ridx] = value;  \
140 a_noinsert:; \
141     ailen[brow] = nrow; \
142   }
143 
144 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
145   { \
146     brow = row/bs;  \
147     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
148     rmax = bimax[brow]; nrow = bilen[brow]; \
149     bcol = col/bs; \
150     ridx = row % bs; cidx = col % bs; \
151     low  = 0; high = nrow; \
152     while (high-low > 3) { \
153       t = (low+high)/2; \
154       if (rp[t] > bcol) high = t; \
155       else              low  = t; \
156     } \
157     for (_i=low; _i<high; _i++) { \
158       if (rp[_i] > bcol) break; \
159       if (rp[_i] == bcol) { \
160         bap = ap +  bs2*_i + bs*cidx + ridx; \
161         if (addv == ADD_VALUES) *bap += value;  \
162         else                    *bap  = value;  \
163         goto b_noinsert; \
164       } \
165     } \
166     if (b->nonew == 1) goto b_noinsert; \
167     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
168     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
169     N = nrow++ - 1;  \
170     /* shift up all the later entries in this row */ \
171     for (ii=N; ii>=_i; ii--) { \
172       rp[ii+1] = rp[ii]; \
173       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174     } \
175     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
176     rp[_i]                      = bcol;  \
177     ap[bs2*_i + bs*cidx + ridx] = value;  \
178 b_noinsert:; \
179     bilen[brow] = nrow; \
180   }
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatSetValues_MPIBAIJ"
184 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
185 {
186   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
187   MatScalar      value;
188   PetscBool      roworiented = baij->roworiented;
189   PetscErrorCode ierr;
190   PetscInt       i,j,row,col;
191   PetscInt       rstart_orig=mat->rmap->rstart;
192   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
193   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
194 
195   /* Some Variables required in the macro */
196   Mat         A     = baij->A;
197   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
198   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
199   MatScalar   *aa   =a->a;
200 
201   Mat         B     = baij->B;
202   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
203   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
204   MatScalar   *ba   =b->a;
205 
206   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
207   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
208   MatScalar *ap,*bap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_DEBUG)
214     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
215 #endif
216     if (im[i] >= rstart_orig && im[i] < rend_orig) {
217       row = im[i] - rstart_orig;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart_orig && in[j] < cend_orig) {
220           col = in[j] - cstart_orig;
221           if (roworiented) value = v[i*n+j];
222           else             value = v[i+j*m];
223           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
224           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_DEBUG)
227         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!baij->colmap) {
232               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined(PETSC_USE_CTABLE)
235             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
236             col  = col - 1;
237 #else
238             col = baij->colmap[in[j]/bs] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
241               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
242               col  =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
244               B    = baij->B;
245               b    = (Mat_SeqBAIJ*)(B)->data;
246               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
247               ba   =b->a;
248             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
249             else col += in[j]%bs;
250           } else col = in[j];
251           if (roworiented) value = v[i*n+j];
252           else             value = v[i+j*m];
253           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
254           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
255         }
256       }
257     } else {
258       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
259       if (!baij->donotstash) {
260         mat->assembled = PETSC_FALSE;
261         if (roworiented) {
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
263         } else {
264           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
265         }
266       }
267     }
268   }
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined"
274 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
275 {
276   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
277   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
278   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
279   PetscErrorCode    ierr;
280   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
281   PetscBool         roworiented=a->roworiented;
282   const PetscScalar *value     = v;
283   MatScalar         *ap,*aa = a->a,*bap;
284 
285   PetscFunctionBegin;
286   rp   = aj + ai[row];
287   ap   = aa + bs2*ai[row];
288   rmax = imax[row];
289   nrow = ailen[row];
290   value = v;
291   low = 0;
292   high = nrow;
293   while (high-low > 7) {
294     t = (low+high)/2;
295     if (rp[t] > col) high = t;
296     else             low  = t;
297   }
298   for (i=low; i<high; i++) {
299     if (rp[i] > col) break;
300     if (rp[i] == col) {
301       bap = ap +  bs2*i;
302       if (roworiented) {
303         if (is == ADD_VALUES) {
304           for (ii=0; ii<bs; ii++) {
305             for (jj=ii; jj<bs2; jj+=bs) {
306               bap[jj] += *value++;
307             }
308           }
309         } else {
310           for (ii=0; ii<bs; ii++) {
311             for (jj=ii; jj<bs2; jj+=bs) {
312               bap[jj] = *value++;
313             }
314           }
315         }
316       } else {
317         if (is == ADD_VALUES) {
318           for (ii=0; ii<bs; ii++,value+=bs) {
319             for (jj=0; jj<bs; jj++) {
320               bap[jj] += value[jj];
321             }
322             bap += bs;
323           }
324         } else {
325           for (ii=0; ii<bs; ii++,value+=bs) {
326             for (jj=0; jj<bs; jj++) {
327               bap[jj]  = value[jj];
328             }
329             bap += bs;
330           }
331         }
332       }
333       goto noinsert2;
334     }
335   }
336   if (nonew == 1) goto noinsert2;
337   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
338   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
339   N = nrow++ - 1; high++;
340   /* shift up all the later entries in this row */
341   for (ii=N; ii>=i; ii--) {
342     rp[ii+1] = rp[ii];
343     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
344   }
345   if (N >= i) {
346     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
347   }
348   rp[i] = col;
349   bap   = ap +  bs2*i;
350   if (roworiented) {
351     for (ii=0; ii<bs; ii++) {
352       for (jj=ii; jj<bs2; jj+=bs) {
353         bap[jj] = *value++;
354       }
355     }
356   } else {
357     for (ii=0; ii<bs; ii++) {
358       for (jj=0; jj<bs; jj++) {
359         *bap++ = *value++;
360       }
361     }
362   }
363   noinsert2:;
364   ailen[row] = nrow;
365   PetscFunctionReturn(0);
366 }
367 
368 #undef __FUNCT__
369 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
370 /*
371     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
372     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
373 */
374 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
375 {
376   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
377   const PetscScalar *value;
378   MatScalar         *barray     = baij->barray;
379   PetscBool         roworiented = baij->roworiented;
380   PetscErrorCode    ierr;
381   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
382   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
383   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
384 
385   PetscFunctionBegin;
386   if (!barray) {
387     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
388     baij->barray = barray;
389   }
390 
391   if (roworiented) stepval = (n-1)*bs;
392   else stepval = (m-1)*bs;
393 
394   for (i=0; i<m; i++) {
395     if (im[i] < 0) continue;
396 #if defined(PETSC_USE_DEBUG)
397     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
398 #endif
399     if (im[i] >= rstart && im[i] < rend) {
400       row = im[i] - rstart;
401       for (j=0; j<n; j++) {
402         /* If NumCol = 1 then a copy is not required */
403         if ((roworiented) && (n == 1)) {
404           barray = (MatScalar*)v + i*bs2;
405         } else if ((!roworiented) && (m == 1)) {
406           barray = (MatScalar*)v + j*bs2;
407         } else { /* Here a copy is required */
408           if (roworiented) {
409             value = v + (i*(stepval+bs) + j)*bs;
410           } else {
411             value = v + (j*(stepval+bs) + i)*bs;
412           }
413           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
414             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
415             barray += bs;
416           }
417           barray -= bs2;
418         }
419 
420         if (in[j] >= cstart && in[j] < cend) {
421           col  = in[j] - cstart;
422           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
423         } else if (in[j] < 0) continue;
424 #if defined(PETSC_USE_DEBUG)
425         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
426 #endif
427         else {
428           if (mat->was_assembled) {
429             if (!baij->colmap) {
430               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
431             }
432 
433 #if defined(PETSC_USE_DEBUG)
434 #if defined(PETSC_USE_CTABLE)
435             { PetscInt data;
436               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
437               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
438             }
439 #else
440             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
441 #endif
442 #endif
443 #if defined(PETSC_USE_CTABLE)
444             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
445             col  = (col - 1)/bs;
446 #else
447             col = (baij->colmap[in[j]] - 1)/bs;
448 #endif
449             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
450               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
451               col  =  in[j];
452             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
453           } else col = in[j];
454           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
455         }
456       }
457     } else {
458       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
459       if (!baij->donotstash) {
460         if (roworiented) {
461           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
462         } else {
463           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
464         }
465       }
466     }
467   }
468   PetscFunctionReturn(0);
469 }
470 
471 #define HASH_KEY 0.6180339887
472 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
473 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
474 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
475 #undef __FUNCT__
476 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
477 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
478 {
479   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
480   PetscBool      roworiented = baij->roworiented;
481   PetscErrorCode ierr;
482   PetscInt       i,j,row,col;
483   PetscInt       rstart_orig=mat->rmap->rstart;
484   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
485   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
486   PetscReal      tmp;
487   MatScalar      **HD = baij->hd,value;
488 #if defined(PETSC_USE_DEBUG)
489   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
490 #endif
491 
492   PetscFunctionBegin;
493   for (i=0; i<m; i++) {
494 #if defined(PETSC_USE_DEBUG)
495     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
496     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
497 #endif
498     row = im[i];
499     if (row >= rstart_orig && row < rend_orig) {
500       for (j=0; j<n; j++) {
501         col = in[j];
502         if (roworiented) value = v[i*n+j];
503         else             value = v[i+j*m];
504         /* Look up PetscInto the Hash Table */
505         key = (row/bs)*Nbs+(col/bs)+1;
506         h1  = HASH(size,key,tmp);
507 
508 
509         idx = h1;
510 #if defined(PETSC_USE_DEBUG)
511         insert_ct++;
512         total_ct++;
513         if (HT[idx] != key) {
514           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
515           if (idx == size) {
516             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
517             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
518           }
519         }
520 #else
521         if (HT[idx] != key) {
522           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
523           if (idx == size) {
524             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
525             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
526           }
527         }
528 #endif
529         /* A HASH table entry is found, so insert the values at the correct address */
530         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
531         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
532       }
533     } else if (!baij->donotstash) {
534       if (roworiented) {
535         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
536       } else {
537         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
538       }
539     }
540   }
541 #if defined(PETSC_USE_DEBUG)
542   baij->ht_total_ct  += total_ct;
543   baij->ht_insert_ct += insert_ct;
544 #endif
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
550 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
551 {
552   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
553   PetscBool         roworiented = baij->roworiented;
554   PetscErrorCode    ierr;
555   PetscInt          i,j,ii,jj,row,col;
556   PetscInt          rstart=baij->rstartbs;
557   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
558   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
559   PetscReal         tmp;
560   MatScalar         **HD = baij->hd,*baij_a;
561   const PetscScalar *v_t,*value;
562 #if defined(PETSC_USE_DEBUG)
563   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
564 #endif
565 
566   PetscFunctionBegin;
567   if (roworiented) stepval = (n-1)*bs;
568   else stepval = (m-1)*bs;
569 
570   for (i=0; i<m; i++) {
571 #if defined(PETSC_USE_DEBUG)
572     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
573     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
574 #endif
575     row = im[i];
576     v_t = v + i*nbs2;
577     if (row >= rstart && row < rend) {
578       for (j=0; j<n; j++) {
579         col = in[j];
580 
581         /* Look up into the Hash Table */
582         key = row*Nbs+col+1;
583         h1  = HASH(size,key,tmp);
584 
585         idx = h1;
586 #if defined(PETSC_USE_DEBUG)
587         total_ct++;
588         insert_ct++;
589         if (HT[idx] != key) {
590           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
591           if (idx == size) {
592             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
593             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
594           }
595         }
596 #else
597         if (HT[idx] != key) {
598           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
599           if (idx == size) {
600             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
601             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
602           }
603         }
604 #endif
605         baij_a = HD[idx];
606         if (roworiented) {
607           /*value = v + i*(stepval+bs)*bs + j*bs;*/
608           /* value = v + (i*(stepval+bs)+j)*bs; */
609           value = v_t;
610           v_t  += bs;
611           if (addv == ADD_VALUES) {
612             for (ii=0; ii<bs; ii++,value+=stepval) {
613               for (jj=ii; jj<bs2; jj+=bs) {
614                 baij_a[jj] += *value++;
615               }
616             }
617           } else {
618             for (ii=0; ii<bs; ii++,value+=stepval) {
619               for (jj=ii; jj<bs2; jj+=bs) {
620                 baij_a[jj] = *value++;
621               }
622             }
623           }
624         } else {
625           value = v + j*(stepval+bs)*bs + i*bs;
626           if (addv == ADD_VALUES) {
627             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
628               for (jj=0; jj<bs; jj++) {
629                 baij_a[jj] += *value++;
630               }
631             }
632           } else {
633             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
634               for (jj=0; jj<bs; jj++) {
635                 baij_a[jj] = *value++;
636               }
637             }
638           }
639         }
640       }
641     } else {
642       if (!baij->donotstash) {
643         if (roworiented) {
644           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
645         } else {
646           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
647         }
648       }
649     }
650   }
651 #if defined(PETSC_USE_DEBUG)
652   baij->ht_total_ct  += total_ct;
653   baij->ht_insert_ct += insert_ct;
654 #endif
655   PetscFunctionReturn(0);
656 }
657 
658 #undef __FUNCT__
659 #define __FUNCT__ "MatGetValues_MPIBAIJ"
660 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
661 {
662   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
663   PetscErrorCode ierr;
664   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
665   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
666 
667   PetscFunctionBegin;
668   for (i=0; i<m; i++) {
669     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
670     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
671     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
672       row = idxm[i] - bsrstart;
673       for (j=0; j<n; j++) {
674         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
675         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
676         if (idxn[j] >= bscstart && idxn[j] < bscend) {
677           col  = idxn[j] - bscstart;
678           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
679         } else {
680           if (!baij->colmap) {
681             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
682           }
683 #if defined(PETSC_USE_CTABLE)
684           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
685           data--;
686 #else
687           data = baij->colmap[idxn[j]/bs]-1;
688 #endif
689           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
690           else {
691             col  = data + idxn[j]%bs;
692             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
693           }
694         }
695       }
696     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNCT__
702 #define __FUNCT__ "MatNorm_MPIBAIJ"
703 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
704 {
705   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
706   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
707   PetscErrorCode ierr;
708   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
709   PetscReal      sum = 0.0;
710   MatScalar      *v;
711 
712   PetscFunctionBegin;
713   if (baij->size == 1) {
714     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
715   } else {
716     if (type == NORM_FROBENIUS) {
717       v  = amat->a;
718       nz = amat->nz*bs2;
719       for (i=0; i<nz; i++) {
720         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
721       }
722       v  = bmat->a;
723       nz = bmat->nz*bs2;
724       for (i=0; i<nz; i++) {
725         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
726       }
727       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
728       *nrm = PetscSqrtReal(*nrm);
729     } else if (type == NORM_1) { /* max column sum */
730       PetscReal *tmp,*tmp2;
731       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
732       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
733       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
734       v    = amat->a; jj = amat->j;
735       for (i=0; i<amat->nz; i++) {
736         for (j=0; j<bs; j++) {
737           col = bs*(cstart + *jj) + j; /* column index */
738           for (row=0; row<bs; row++) {
739             tmp[col] += PetscAbsScalar(*v);  v++;
740           }
741         }
742         jj++;
743       }
744       v = bmat->a; jj = bmat->j;
745       for (i=0; i<bmat->nz; i++) {
746         for (j=0; j<bs; j++) {
747           col = bs*garray[*jj] + j;
748           for (row=0; row<bs; row++) {
749             tmp[col] += PetscAbsScalar(*v); v++;
750           }
751         }
752         jj++;
753       }
754       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
755       *nrm = 0.0;
756       for (j=0; j<mat->cmap->N; j++) {
757         if (tmp2[j] > *nrm) *nrm = tmp2[j];
758       }
759       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
760     } else if (type == NORM_INFINITY) { /* max row sum */
761       PetscReal *sums;
762       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
763       sum  = 0.0;
764       for (j=0; j<amat->mbs; j++) {
765         for (row=0; row<bs; row++) sums[row] = 0.0;
766         v  = amat->a + bs2*amat->i[j];
767         nz = amat->i[j+1]-amat->i[j];
768         for (i=0; i<nz; i++) {
769           for (col=0; col<bs; col++) {
770             for (row=0; row<bs; row++) {
771               sums[row] += PetscAbsScalar(*v); v++;
772             }
773           }
774         }
775         v  = bmat->a + bs2*bmat->i[j];
776         nz = bmat->i[j+1]-bmat->i[j];
777         for (i=0; i<nz; i++) {
778           for (col=0; col<bs; col++) {
779             for (row=0; row<bs; row++) {
780               sums[row] += PetscAbsScalar(*v); v++;
781             }
782           }
783         }
784         for (row=0; row<bs; row++) {
785           if (sums[row] > sum) sum = sums[row];
786         }
787       }
788       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
789       ierr = PetscFree(sums);CHKERRQ(ierr);
790     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 /*
796   Creates the hash table, and sets the table
797   This table is created only once.
798   If new entried need to be added to the matrix
799   then the hash table has to be destroyed and
800   recreated.
801 */
802 #undef __FUNCT__
803 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
804 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
805 {
806   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
807   Mat            A     = baij->A,B=baij->B;
808   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
809   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
810   PetscErrorCode ierr;
811   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
812   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
813   PetscInt       *HT,key;
814   MatScalar      **HD;
815   PetscReal      tmp;
816 #if defined(PETSC_USE_INFO)
817   PetscInt ct=0,max=0;
818 #endif
819 
820   PetscFunctionBegin;
821   if (baij->ht) PetscFunctionReturn(0);
822 
823   baij->ht_size = (PetscInt)(factor*nz);
824   ht_size       = baij->ht_size;
825 
826   /* Allocate Memory for Hash Table */
827   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
828   HD   = baij->hd;
829   HT   = baij->ht;
830 
831   /* Loop Over A */
832   for (i=0; i<a->mbs; i++) {
833     for (j=ai[i]; j<ai[i+1]; j++) {
834       row = i+rstart;
835       col = aj[j]+cstart;
836 
837       key = row*Nbs + col + 1;
838       h1  = HASH(ht_size,key,tmp);
839       for (k=0; k<ht_size; k++) {
840         if (!HT[(h1+k)%ht_size]) {
841           HT[(h1+k)%ht_size] = key;
842           HD[(h1+k)%ht_size] = a->a + j*bs2;
843           break;
844 #if defined(PETSC_USE_INFO)
845         } else {
846           ct++;
847 #endif
848         }
849       }
850 #if defined(PETSC_USE_INFO)
851       if (k> max) max = k;
852 #endif
853     }
854   }
855   /* Loop Over B */
856   for (i=0; i<b->mbs; i++) {
857     for (j=bi[i]; j<bi[i+1]; j++) {
858       row = i+rstart;
859       col = garray[bj[j]];
860       key = row*Nbs + col + 1;
861       h1  = HASH(ht_size,key,tmp);
862       for (k=0; k<ht_size; k++) {
863         if (!HT[(h1+k)%ht_size]) {
864           HT[(h1+k)%ht_size] = key;
865           HD[(h1+k)%ht_size] = b->a + j*bs2;
866           break;
867 #if defined(PETSC_USE_INFO)
868         } else {
869           ct++;
870 #endif
871         }
872       }
873 #if defined(PETSC_USE_INFO)
874       if (k> max) max = k;
875 #endif
876     }
877   }
878 
879   /* Print Summary */
880 #if defined(PETSC_USE_INFO)
881   for (i=0,j=0; i<ht_size; i++) {
882     if (HT[i]) j++;
883   }
884   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
885 #endif
886   PetscFunctionReturn(0);
887 }
888 
889 #undef __FUNCT__
890 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
891 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
892 {
893   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
894   PetscErrorCode ierr;
895   PetscInt       nstash,reallocs;
896 
897   PetscFunctionBegin;
898   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
899 
900   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
901   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
902   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
903   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
904   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
905   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
906   PetscFunctionReturn(0);
907 }
908 
909 #undef __FUNCT__
910 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
911 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
912 {
913   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
914   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
915   PetscErrorCode ierr;
916   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
917   PetscInt       *row,*col;
918   PetscBool      r1,r2,r3,other_disassembled;
919   MatScalar      *val;
920   PetscMPIInt    n;
921 
922   PetscFunctionBegin;
923   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
924   if (!baij->donotstash && !mat->nooffprocentries) {
925     while (1) {
926       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
927       if (!flg) break;
928 
929       for (i=0; i<n;) {
930         /* Now identify the consecutive vals belonging to the same row */
931         for (j=i,rstart=row[j]; j<n; j++) {
932           if (row[j] != rstart) break;
933         }
934         if (j < n) ncols = j-i;
935         else       ncols = n-i;
936         /* Now assemble all these values with a single function call */
937         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
938         i    = j;
939       }
940     }
941     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
942     /* Now process the block-stash. Since the values are stashed column-oriented,
943        set the roworiented flag to column oriented, and after MatSetValues()
944        restore the original flags */
945     r1 = baij->roworiented;
946     r2 = a->roworiented;
947     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
948 
949     baij->roworiented = PETSC_FALSE;
950     a->roworiented    = PETSC_FALSE;
951 
952     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
953     while (1) {
954       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
955       if (!flg) break;
956 
957       for (i=0; i<n;) {
958         /* Now identify the consecutive vals belonging to the same row */
959         for (j=i,rstart=row[j]; j<n; j++) {
960           if (row[j] != rstart) break;
961         }
962         if (j < n) ncols = j-i;
963         else       ncols = n-i;
964         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
965         i    = j;
966       }
967     }
968     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
969 
970     baij->roworiented = r1;
971     a->roworiented    = r2;
972 
973     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
974   }
975 
976   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
977   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
978 
979   /* determine if any processor has disassembled, if so we must
980      also disassemble ourselfs, in order that we may reassemble. */
981   /*
982      if nonzero structure of submatrix B cannot change then we know that
983      no processor disassembled thus we can skip this stuff
984   */
985   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
986     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
987     if (mat->was_assembled && !other_disassembled) {
988       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
989     }
990   }
991 
992   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
993     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
994   }
995   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
996   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
997 
998 #if defined(PETSC_USE_INFO)
999   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1000     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
1001 
1002     baij->ht_total_ct  = 0;
1003     baij->ht_insert_ct = 0;
1004   }
1005 #endif
1006   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1007     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1008 
1009     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1010     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1011   }
1012 
1013   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1014 
1015   baij->rowvalues = 0;
1016 
1017   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1018   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
1019     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1020     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1021   }
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
1026 #include <petscdraw.h>
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1029 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1030 {
1031   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1032   PetscErrorCode    ierr;
1033   PetscMPIInt       rank = baij->rank;
1034   PetscInt          bs   = mat->rmap->bs;
1035   PetscBool         iascii,isdraw;
1036   PetscViewer       sviewer;
1037   PetscViewerFormat format;
1038 
1039   PetscFunctionBegin;
1040   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1041   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1042   if (iascii) {
1043     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1044     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1045       MatInfo info;
1046       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1047       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1048       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1049       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1050                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1051       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1052       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1053       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1054       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1055       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1056       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1057       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1058       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1059       PetscFunctionReturn(0);
1060     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1061       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1062       PetscFunctionReturn(0);
1063     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1064       PetscFunctionReturn(0);
1065     }
1066   }
1067 
1068   if (isdraw) {
1069     PetscDraw draw;
1070     PetscBool isnull;
1071     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1072     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1073     if (isnull) PetscFunctionReturn(0);
1074   }
1075 
1076   {
1077     /* assemble the entire matrix onto first processor. */
1078     Mat         A;
1079     Mat_SeqBAIJ *Aloc;
1080     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1081     MatScalar   *a;
1082     const char  *matname;
1083 
1084     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1085     /* Perhaps this should be the type of mat? */
1086     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1087     if (!rank) {
1088       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1089     } else {
1090       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1091     }
1092     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1093     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1094     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1095     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1096 
1097     /* copy over the A part */
1098     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1099     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1100     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1101 
1102     for (i=0; i<mbs; i++) {
1103       rvals[0] = bs*(baij->rstartbs + i);
1104       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1105       for (j=ai[i]; j<ai[i+1]; j++) {
1106         col = (baij->cstartbs+aj[j])*bs;
1107         for (k=0; k<bs; k++) {
1108           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1109           col++; a += bs;
1110         }
1111       }
1112     }
1113     /* copy over the B part */
1114     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1115     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1116     for (i=0; i<mbs; i++) {
1117       rvals[0] = bs*(baij->rstartbs + i);
1118       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1119       for (j=ai[i]; j<ai[i+1]; j++) {
1120         col = baij->garray[aj[j]]*bs;
1121         for (k=0; k<bs; k++) {
1122           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1123           col++; a += bs;
1124         }
1125       }
1126     }
1127     ierr = PetscFree(rvals);CHKERRQ(ierr);
1128     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1129     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1130     /*
1131        Everyone has to call to draw the matrix since the graphics waits are
1132        synchronized across all processors that share the PetscDraw object
1133     */
1134     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1135     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1136     if (!rank) {
1137       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1138       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1139     }
1140     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1141     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1142     ierr = MatDestroy(&A);CHKERRQ(ierr);
1143   }
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 #undef __FUNCT__
1148 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1149 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1150 {
1151   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1152   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1153   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1154   PetscErrorCode ierr;
1155   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1156   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1157   int            fd;
1158   PetscScalar    *column_values;
1159   FILE           *file;
1160   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1161   PetscInt       message_count,flowcontrolcount;
1162 
1163   PetscFunctionBegin;
1164   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1165   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1166   nz   = bs2*(A->nz + B->nz);
1167   rlen = mat->rmap->n;
1168   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1169   if (!rank) {
1170     header[0] = MAT_FILE_CLASSID;
1171     header[1] = mat->rmap->N;
1172     header[2] = mat->cmap->N;
1173 
1174     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1175     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1176     /* get largest number of rows any processor has */
1177     range = mat->rmap->range;
1178     for (i=1; i<size; i++) {
1179       rlen = PetscMax(rlen,range[i+1] - range[i]);
1180     }
1181   } else {
1182     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1183   }
1184 
1185   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1186   /* compute lengths of each row  */
1187   for (i=0; i<a->mbs; i++) {
1188     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1189   }
1190   /* store the row lengths to the file */
1191   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1192   if (!rank) {
1193     MPI_Status status;
1194     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1195     rlen = (range[1] - range[0])/bs;
1196     for (i=0; i<rlen; i++) {
1197       for (j=0; j<bs; j++) {
1198         row_lens[i*bs+j] = bs*crow_lens[i];
1199       }
1200     }
1201     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1202     for (i=1; i<size; i++) {
1203       rlen = (range[i+1] - range[i])/bs;
1204       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1205       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1206       for (k=0; k<rlen; k++) {
1207         for (j=0; j<bs; j++) {
1208           row_lens[k*bs+j] = bs*crow_lens[k];
1209         }
1210       }
1211       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1212     }
1213     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1214     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1215   } else {
1216     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1217     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1218     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1219   }
1220   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1221 
1222   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1223      information needed to make it for each row from a block row. This does require more communication but still not more than
1224      the communication needed for the nonzero values  */
1225   nzmax = nz; /*  space a largest processor needs */
1226   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1227   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1228   cnt   = 0;
1229   for (i=0; i<a->mbs; i++) {
1230     pcnt = cnt;
1231     for (j=B->i[i]; j<B->i[i+1]; j++) {
1232       if ((col = garray[B->j[j]]) > cstart) break;
1233       for (l=0; l<bs; l++) {
1234         column_indices[cnt++] = bs*col+l;
1235       }
1236     }
1237     for (k=A->i[i]; k<A->i[i+1]; k++) {
1238       for (l=0; l<bs; l++) {
1239         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1240       }
1241     }
1242     for (; j<B->i[i+1]; j++) {
1243       for (l=0; l<bs; l++) {
1244         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1245       }
1246     }
1247     len = cnt - pcnt;
1248     for (k=1; k<bs; k++) {
1249       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1250       cnt += len;
1251     }
1252   }
1253   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1254 
1255   /* store the columns to the file */
1256   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1257   if (!rank) {
1258     MPI_Status status;
1259     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1260     for (i=1; i<size; i++) {
1261       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1262       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1263       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1264       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1265     }
1266     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1267   } else {
1268     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1269     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1271     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1272   }
1273   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1274 
1275   /* load up the numerical values */
1276   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1277   cnt  = 0;
1278   for (i=0; i<a->mbs; i++) {
1279     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1280     for (j=B->i[i]; j<B->i[i+1]; j++) {
1281       if (garray[B->j[j]] > cstart) break;
1282       for (l=0; l<bs; l++) {
1283         for (ll=0; ll<bs; ll++) {
1284           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1285         }
1286       }
1287       cnt += bs;
1288     }
1289     for (k=A->i[i]; k<A->i[i+1]; k++) {
1290       for (l=0; l<bs; l++) {
1291         for (ll=0; ll<bs; ll++) {
1292           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1293         }
1294       }
1295       cnt += bs;
1296     }
1297     for (; j<B->i[i+1]; j++) {
1298       for (l=0; l<bs; l++) {
1299         for (ll=0; ll<bs; ll++) {
1300           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1301         }
1302       }
1303       cnt += bs;
1304     }
1305     cnt += (bs-1)*rlen;
1306   }
1307   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1308 
1309   /* store the column values to the file */
1310   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1311   if (!rank) {
1312     MPI_Status status;
1313     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1314     for (i=1; i<size; i++) {
1315       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1316       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1317       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1318       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1319     }
1320     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1321   } else {
1322     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1323     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1324     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1325     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1326   }
1327   ierr = PetscFree(column_values);CHKERRQ(ierr);
1328 
1329   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1330   if (file) {
1331     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1332   }
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatView_MPIBAIJ"
1338 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1339 {
1340   PetscErrorCode ierr;
1341   PetscBool      iascii,isdraw,issocket,isbinary;
1342 
1343   PetscFunctionBegin;
1344   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1345   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1346   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1347   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1348   if (iascii || isdraw || issocket) {
1349     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1350   } else if (isbinary) {
1351     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1352   }
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1358 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1359 {
1360   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1361   PetscErrorCode ierr;
1362 
1363   PetscFunctionBegin;
1364 #if defined(PETSC_USE_LOG)
1365   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1366 #endif
1367   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1368   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1369   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1370   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1371 #if defined(PETSC_USE_CTABLE)
1372   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1373 #else
1374   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1375 #endif
1376   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1377   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1378   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1379   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1380   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1381   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1382   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1383   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1384 
1385   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1386   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1387   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1388   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1389   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1390   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1391   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1392   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1393   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1394 #if defined(PETSC_HAVE_HYPRE)
1395   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1396 #endif
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 #undef __FUNCT__
1401 #define __FUNCT__ "MatMult_MPIBAIJ"
1402 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1403 {
1404   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1405   PetscErrorCode ierr;
1406   PetscInt       nt;
1407 
1408   PetscFunctionBegin;
1409   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1410   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1411   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1412   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1413   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1414   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1415   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1416   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1422 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1423 {
1424   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1425   PetscErrorCode ierr;
1426 
1427   PetscFunctionBegin;
1428   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1429   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1430   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1431   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1437 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1438 {
1439   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1440   PetscErrorCode ierr;
1441   PetscBool      merged;
1442 
1443   PetscFunctionBegin;
1444   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1445   /* do nondiagonal part */
1446   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1447   if (!merged) {
1448     /* send it on its way */
1449     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1450     /* do local part */
1451     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1452     /* receive remote parts: note this assumes the values are not actually */
1453     /* inserted in yy until the next line */
1454     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1455   } else {
1456     /* do local part */
1457     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1458     /* send it on its way */
1459     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1460     /* values actually were received in the Begin() but we need to call this nop */
1461     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1462   }
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1468 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1469 {
1470   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1471   PetscErrorCode ierr;
1472 
1473   PetscFunctionBegin;
1474   /* do nondiagonal part */
1475   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1476   /* send it on its way */
1477   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1478   /* do local part */
1479   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1480   /* receive remote parts: note this assumes the values are not actually */
1481   /* inserted in yy until the next line, which is true for my implementation*/
1482   /* but is not perhaps always true. */
1483   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 /*
1488   This only works correctly for square matrices where the subblock A->A is the
1489    diagonal block
1490 */
1491 #undef __FUNCT__
1492 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1493 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1494 {
1495   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1496   PetscErrorCode ierr;
1497 
1498   PetscFunctionBegin;
1499   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1500   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatScale_MPIBAIJ"
1506 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1507 {
1508   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1509   PetscErrorCode ierr;
1510 
1511   PetscFunctionBegin;
1512   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1513   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 #undef __FUNCT__
1518 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1519 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1520 {
1521   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1522   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1523   PetscErrorCode ierr;
1524   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1525   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1526   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1527 
1528   PetscFunctionBegin;
1529   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1530   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1531   mat->getrowactive = PETSC_TRUE;
1532 
1533   if (!mat->rowvalues && (idx || v)) {
1534     /*
1535         allocate enough space to hold information from the longest row.
1536     */
1537     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1538     PetscInt    max = 1,mbs = mat->mbs,tmp;
1539     for (i=0; i<mbs; i++) {
1540       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1541       if (max < tmp) max = tmp;
1542     }
1543     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1544   }
1545   lrow = row - brstart;
1546 
1547   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1548   if (!v)   {pvA = 0; pvB = 0;}
1549   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1550   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1551   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1552   nztot = nzA + nzB;
1553 
1554   cmap = mat->garray;
1555   if (v  || idx) {
1556     if (nztot) {
1557       /* Sort by increasing column numbers, assuming A and B already sorted */
1558       PetscInt imark = -1;
1559       if (v) {
1560         *v = v_p = mat->rowvalues;
1561         for (i=0; i<nzB; i++) {
1562           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1563           else break;
1564         }
1565         imark = i;
1566         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1567         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1568       }
1569       if (idx) {
1570         *idx = idx_p = mat->rowindices;
1571         if (imark > -1) {
1572           for (i=0; i<imark; i++) {
1573             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1574           }
1575         } else {
1576           for (i=0; i<nzB; i++) {
1577             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1578             else break;
1579           }
1580           imark = i;
1581         }
1582         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1583         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1584       }
1585     } else {
1586       if (idx) *idx = 0;
1587       if (v)   *v   = 0;
1588     }
1589   }
1590   *nz  = nztot;
1591   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1592   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 #undef __FUNCT__
1597 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1598 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1599 {
1600   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1601 
1602   PetscFunctionBegin;
1603   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1604   baij->getrowactive = PETSC_FALSE;
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 #undef __FUNCT__
1609 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1610 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1611 {
1612   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1613   PetscErrorCode ierr;
1614 
1615   PetscFunctionBegin;
1616   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1617   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 #undef __FUNCT__
1622 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1623 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1624 {
1625   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1626   Mat            A  = a->A,B = a->B;
1627   PetscErrorCode ierr;
1628   PetscReal      isend[5],irecv[5];
1629 
1630   PetscFunctionBegin;
1631   info->block_size = (PetscReal)matin->rmap->bs;
1632 
1633   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1634 
1635   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1636   isend[3] = info->memory;  isend[4] = info->mallocs;
1637 
1638   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1639 
1640   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1641   isend[3] += info->memory;  isend[4] += info->mallocs;
1642 
1643   if (flag == MAT_LOCAL) {
1644     info->nz_used      = isend[0];
1645     info->nz_allocated = isend[1];
1646     info->nz_unneeded  = isend[2];
1647     info->memory       = isend[3];
1648     info->mallocs      = isend[4];
1649   } else if (flag == MAT_GLOBAL_MAX) {
1650     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1651 
1652     info->nz_used      = irecv[0];
1653     info->nz_allocated = irecv[1];
1654     info->nz_unneeded  = irecv[2];
1655     info->memory       = irecv[3];
1656     info->mallocs      = irecv[4];
1657   } else if (flag == MAT_GLOBAL_SUM) {
1658     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1659 
1660     info->nz_used      = irecv[0];
1661     info->nz_allocated = irecv[1];
1662     info->nz_unneeded  = irecv[2];
1663     info->memory       = irecv[3];
1664     info->mallocs      = irecv[4];
1665   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1666   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1667   info->fill_ratio_needed = 0;
1668   info->factor_mallocs    = 0;
1669   PetscFunctionReturn(0);
1670 }
1671 
1672 #undef __FUNCT__
1673 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1674 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1675 {
1676   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1677   PetscErrorCode ierr;
1678 
1679   PetscFunctionBegin;
1680   switch (op) {
1681   case MAT_NEW_NONZERO_LOCATIONS:
1682   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1683   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1684   case MAT_KEEP_NONZERO_PATTERN:
1685   case MAT_NEW_NONZERO_LOCATION_ERR:
1686     MatCheckPreallocated(A,1);
1687     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1688     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1689     break;
1690   case MAT_ROW_ORIENTED:
1691     MatCheckPreallocated(A,1);
1692     a->roworiented = flg;
1693 
1694     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1695     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1696     break;
1697   case MAT_NEW_DIAGONALS:
1698     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1699     break;
1700   case MAT_IGNORE_OFF_PROC_ENTRIES:
1701     a->donotstash = flg;
1702     break;
1703   case MAT_USE_HASH_TABLE:
1704     a->ht_flag = flg;
1705     a->ht_fact = 1.39;
1706     break;
1707   case MAT_SYMMETRIC:
1708   case MAT_STRUCTURALLY_SYMMETRIC:
1709   case MAT_HERMITIAN:
1710   case MAT_SYMMETRY_ETERNAL:
1711     MatCheckPreallocated(A,1);
1712     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1713     break;
1714   default:
1715     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1716   }
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1722 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1723 {
1724   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1725   Mat_SeqBAIJ    *Aloc;
1726   Mat            B;
1727   PetscErrorCode ierr;
1728   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1729   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1730   MatScalar      *a;
1731 
1732   PetscFunctionBegin;
1733   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1734   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1735     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1736     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1737     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1738     /* Do not know preallocation information, but must set block size */
1739     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1740   } else {
1741     B = *matout;
1742   }
1743 
1744   /* copy over the A part */
1745   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1746   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1747   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1748 
1749   for (i=0; i<mbs; i++) {
1750     rvals[0] = bs*(baij->rstartbs + i);
1751     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1752     for (j=ai[i]; j<ai[i+1]; j++) {
1753       col = (baij->cstartbs+aj[j])*bs;
1754       for (k=0; k<bs; k++) {
1755         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1756 
1757         col++; a += bs;
1758       }
1759     }
1760   }
1761   /* copy over the B part */
1762   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1763   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1764   for (i=0; i<mbs; i++) {
1765     rvals[0] = bs*(baij->rstartbs + i);
1766     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1767     for (j=ai[i]; j<ai[i+1]; j++) {
1768       col = baij->garray[aj[j]]*bs;
1769       for (k=0; k<bs; k++) {
1770         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1771         col++;
1772         a += bs;
1773       }
1774     }
1775   }
1776   ierr = PetscFree(rvals);CHKERRQ(ierr);
1777   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1778   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1779 
1780   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1781   else {
1782     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1783   }
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 #undef __FUNCT__
1788 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1789 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1790 {
1791   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1792   Mat            a     = baij->A,b = baij->B;
1793   PetscErrorCode ierr;
1794   PetscInt       s1,s2,s3;
1795 
1796   PetscFunctionBegin;
1797   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1798   if (rr) {
1799     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1800     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1801     /* Overlap communication with computation. */
1802     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1803   }
1804   if (ll) {
1805     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1806     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1807     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1808   }
1809   /* scale  the diagonal block */
1810   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1811 
1812   if (rr) {
1813     /* Do a scatter end and then right scale the off-diagonal block */
1814     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1815     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1816   }
1817   PetscFunctionReturn(0);
1818 }
1819 
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1822 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1823 {
1824   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1825   PetscInt      *lrows;
1826   PetscInt       r, len;
1827   PetscErrorCode ierr;
1828 
1829   PetscFunctionBegin;
1830   /* get locally owned rows */
1831   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1832   /* fix right hand side if needed */
1833   if (x && b) {
1834     const PetscScalar *xx;
1835     PetscScalar       *bb;
1836 
1837     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1838     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1839     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1840     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1841     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1842   }
1843 
1844   /* actually zap the local rows */
1845   /*
1846         Zero the required rows. If the "diagonal block" of the matrix
1847      is square and the user wishes to set the diagonal we use separate
1848      code so that MatSetValues() is not called for each diagonal allocating
1849      new memory, thus calling lots of mallocs and slowing things down.
1850 
1851   */
1852   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1853   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1854   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
1855     PetscBool cong;
1856     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
1857     if (cong) A->congruentlayouts = 1;
1858     else      A->congruentlayouts = 0;
1859   }
1860   if ((diag != 0.0) && A->congruentlayouts) {
1861     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1862   } else if (diag != 0.0) {
1863     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1864     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1865        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1866     for (r = 0; r < len; ++r) {
1867       const PetscInt row = lrows[r] + A->rmap->rstart;
1868       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1869     }
1870     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1871     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1872   } else {
1873     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1874   }
1875   ierr = PetscFree(lrows);CHKERRQ(ierr);
1876 
1877   /* only change matrix nonzero state if pattern was allowed to be changed */
1878   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1879     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1880     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1881   }
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1887 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1888 {
1889   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1890   PetscErrorCode    ierr;
1891   PetscMPIInt       n = A->rmap->n;
1892   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1893   PetscInt          *lrows,*owners = A->rmap->range;
1894   PetscSFNode       *rrows;
1895   PetscSF           sf;
1896   const PetscScalar *xx;
1897   PetscScalar       *bb,*mask;
1898   Vec               xmask,lmask;
1899   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1900   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1901   PetscScalar       *aa;
1902 
1903   PetscFunctionBegin;
1904   /* Create SF where leaves are input rows and roots are owned rows */
1905   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1906   for (r = 0; r < n; ++r) lrows[r] = -1;
1907   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1908   for (r = 0; r < N; ++r) {
1909     const PetscInt idx   = rows[r];
1910     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1911     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1912       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1913     }
1914     rrows[r].rank  = p;
1915     rrows[r].index = rows[r] - owners[p];
1916   }
1917   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1918   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1919   /* Collect flags for rows to be zeroed */
1920   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1921   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1922   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1923   /* Compress and put in row numbers */
1924   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1925   /* zero diagonal part of matrix */
1926   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1927   /* handle off diagonal part of matrix */
1928   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1929   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1930   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1931   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1932   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1933   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1934   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1935   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1936   if (x) {
1937     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1938     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1939     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1940     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1941   }
1942   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1943   /* remove zeroed rows of off diagonal matrix */
1944   for (i = 0; i < len; ++i) {
1945     row   = lrows[i];
1946     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1947     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1948     for (k = 0; k < count; ++k) {
1949       aa[0] = 0.0;
1950       aa   += bs;
1951     }
1952   }
1953   /* loop over all elements of off process part of matrix zeroing removed columns*/
1954   for (i = 0; i < l->B->rmap->N; ++i) {
1955     row = i/bs;
1956     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1957       for (k = 0; k < bs; ++k) {
1958         col = bs*baij->j[j] + k;
1959         if (PetscAbsScalar(mask[col])) {
1960           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1961           if (x) bb[i] -= aa[0]*xx[col];
1962           aa[0] = 0.0;
1963         }
1964       }
1965     }
1966   }
1967   if (x) {
1968     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1969     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1970   }
1971   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1972   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1973   ierr = PetscFree(lrows);CHKERRQ(ierr);
1974 
1975   /* only change matrix nonzero state if pattern was allowed to be changed */
1976   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1977     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1978     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1979   }
1980   PetscFunctionReturn(0);
1981 }
1982 
1983 #undef __FUNCT__
1984 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1985 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1986 {
1987   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1988   PetscErrorCode ierr;
1989 
1990   PetscFunctionBegin;
1991   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1992   PetscFunctionReturn(0);
1993 }
1994 
1995 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatEqual_MPIBAIJ"
1999 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
2000 {
2001   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
2002   Mat            a,b,c,d;
2003   PetscBool      flg;
2004   PetscErrorCode ierr;
2005 
2006   PetscFunctionBegin;
2007   a = matA->A; b = matA->B;
2008   c = matB->A; d = matB->B;
2009 
2010   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2011   if (flg) {
2012     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2013   }
2014   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 #undef __FUNCT__
2019 #define __FUNCT__ "MatCopy_MPIBAIJ"
2020 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
2021 {
2022   PetscErrorCode ierr;
2023   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2024   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2025 
2026   PetscFunctionBegin;
2027   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2028   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2029     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2030   } else {
2031     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2032     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2033   }
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatSetUp_MPIBAIJ"
2039 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
2040 {
2041   PetscErrorCode ierr;
2042 
2043   PetscFunctionBegin;
2044   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2045   PetscFunctionReturn(0);
2046 }
2047 
2048 #undef __FUNCT__
2049 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ"
2050 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2051 {
2052   PetscErrorCode ierr;
2053   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
2054   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2055   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2056 
2057   PetscFunctionBegin;
2058   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2059   PetscFunctionReturn(0);
2060 }
2061 
2062 #undef __FUNCT__
2063 #define __FUNCT__ "MatAXPY_MPIBAIJ"
2064 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2065 {
2066   PetscErrorCode ierr;
2067   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
2068   PetscBLASInt   bnz,one=1;
2069   Mat_SeqBAIJ    *x,*y;
2070 
2071   PetscFunctionBegin;
2072   if (str == SAME_NONZERO_PATTERN) {
2073     PetscScalar alpha = a;
2074     x    = (Mat_SeqBAIJ*)xx->A->data;
2075     y    = (Mat_SeqBAIJ*)yy->A->data;
2076     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2077     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2078     x    = (Mat_SeqBAIJ*)xx->B->data;
2079     y    = (Mat_SeqBAIJ*)yy->B->data;
2080     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2081     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2082     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2083   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2084     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2085   } else {
2086     Mat      B;
2087     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
2088     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2089     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2090     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2091     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2092     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2093     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2094     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
2095     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2096     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2097     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2098     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
2099     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2100     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2101     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2102     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2103   }
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 #undef __FUNCT__
2108 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2109 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2110 {
2111   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2112   PetscErrorCode ierr;
2113 
2114   PetscFunctionBegin;
2115   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2116   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 #undef __FUNCT__
2121 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2122 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2123 {
2124   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2125   PetscErrorCode ierr;
2126 
2127   PetscFunctionBegin;
2128   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2129   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 #undef __FUNCT__
2134 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2135 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2136 {
2137   PetscErrorCode ierr;
2138   IS             iscol_local;
2139   PetscInt       csize;
2140 
2141   PetscFunctionBegin;
2142   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2143   if (call == MAT_REUSE_MATRIX) {
2144     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2145     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2146   } else {
2147     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2148   }
2149   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2150   if (call == MAT_INITIAL_MATRIX) {
2151     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2152     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2153   }
2154   PetscFunctionReturn(0);
2155 }
2156 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2157 #undef __FUNCT__
2158 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2159 /*
2160   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2161   in local and then by concatenating the local matrices the end result.
2162   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ().
2163   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
2164 */
2165 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2166 {
2167   PetscErrorCode ierr;
2168   PetscMPIInt    rank,size;
2169   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2170   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2171   Mat            M,Mreuse;
2172   MatScalar      *vwork,*aa;
2173   MPI_Comm       comm;
2174   IS             isrow_new, iscol_new;
2175   PetscBool      idflag,allrows, allcols;
2176   Mat_SeqBAIJ    *aij;
2177 
2178   PetscFunctionBegin;
2179   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2180   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2181   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2182   /* The compression and expansion should be avoided. Doesn't point
2183      out errors, might change the indices, hence buggey */
2184   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2185   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2186 
2187   /* Check for special case: each processor gets entire matrix columns */
2188   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2189   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2190   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2191   else allcols = PETSC_FALSE;
2192 
2193   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2194   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2195   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2196   else allrows = PETSC_FALSE;
2197 
2198   if (call ==  MAT_REUSE_MATRIX) {
2199     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2200     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2201     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2202   } else {
2203     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2204   }
2205   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2206   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2207   /*
2208       m - number of local rows
2209       n - number of columns (same on all processors)
2210       rstart - first row in new global matrix generated
2211   */
2212   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2213   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2214   m    = m/bs;
2215   n    = n/bs;
2216 
2217   if (call == MAT_INITIAL_MATRIX) {
2218     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2219     ii  = aij->i;
2220     jj  = aij->j;
2221 
2222     /*
2223         Determine the number of non-zeros in the diagonal and off-diagonal
2224         portions of the matrix in order to do correct preallocation
2225     */
2226 
2227     /* first get start and end of "diagonal" columns */
2228     if (csize == PETSC_DECIDE) {
2229       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2230       if (mglobal == n*bs) { /* square matrix */
2231         nlocal = m;
2232       } else {
2233         nlocal = n/size + ((n % size) > rank);
2234       }
2235     } else {
2236       nlocal = csize/bs;
2237     }
2238     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2239     rstart = rend - nlocal;
2240     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2241 
2242     /* next, compute all the lengths */
2243     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2244     for (i=0; i<m; i++) {
2245       jend = ii[i+1] - ii[i];
2246       olen = 0;
2247       dlen = 0;
2248       for (j=0; j<jend; j++) {
2249         if (*jj < rstart || *jj >= rend) olen++;
2250         else dlen++;
2251         jj++;
2252       }
2253       olens[i] = olen;
2254       dlens[i] = dlen;
2255     }
2256     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2257     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2258     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2259     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2260     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2261     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2262   } else {
2263     PetscInt ml,nl;
2264 
2265     M    = *newmat;
2266     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2267     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2268     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2269     /*
2270          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2271        rather than the slower MatSetValues().
2272     */
2273     M->was_assembled = PETSC_TRUE;
2274     M->assembled     = PETSC_FALSE;
2275   }
2276   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2277   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2278   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2279   ii   = aij->i;
2280   jj   = aij->j;
2281   aa   = aij->a;
2282   for (i=0; i<m; i++) {
2283     row   = rstart/bs + i;
2284     nz    = ii[i+1] - ii[i];
2285     cwork = jj;     jj += nz;
2286     vwork = aa;     aa += nz*bs*bs;
2287     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2288   }
2289 
2290   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2291   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2292   *newmat = M;
2293 
2294   /* save submatrix used in processor for next request */
2295   if (call ==  MAT_INITIAL_MATRIX) {
2296     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2297     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2298   }
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 #undef __FUNCT__
2303 #define __FUNCT__ "MatPermute_MPIBAIJ"
2304 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2305 {
2306   MPI_Comm       comm,pcomm;
2307   PetscInt       clocal_size,nrows;
2308   const PetscInt *rows;
2309   PetscMPIInt    size;
2310   IS             crowp,lcolp;
2311   PetscErrorCode ierr;
2312 
2313   PetscFunctionBegin;
2314   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2315   /* make a collective version of 'rowp' */
2316   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2317   if (pcomm==comm) {
2318     crowp = rowp;
2319   } else {
2320     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2321     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2322     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2323     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2324   }
2325   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2326   /* make a local version of 'colp' */
2327   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2328   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2329   if (size==1) {
2330     lcolp = colp;
2331   } else {
2332     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2333   }
2334   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2335   /* now we just get the submatrix */
2336   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2337   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2338   /* clean up */
2339   if (pcomm!=comm) {
2340     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2341   }
2342   if (size>1) {
2343     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2344   }
2345   PetscFunctionReturn(0);
2346 }
2347 
2348 #undef __FUNCT__
2349 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2350 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2351 {
2352   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2353   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2354 
2355   PetscFunctionBegin;
2356   if (nghosts) *nghosts = B->nbs;
2357   if (ghosts) *ghosts = baij->garray;
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 #undef __FUNCT__
2362 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2363 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2364 {
2365   Mat            B;
2366   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2367   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2368   Mat_SeqAIJ     *b;
2369   PetscErrorCode ierr;
2370   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2371   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2372   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2373 
2374   PetscFunctionBegin;
2375   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2376   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2377 
2378   /* ----------------------------------------------------------------
2379      Tell every processor the number of nonzeros per row
2380   */
2381   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2382   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2383     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2384   }
2385   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2386   displs    = recvcounts + size;
2387   for (i=0; i<size; i++) {
2388     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2389     displs[i]     = A->rmap->range[i]/bs;
2390   }
2391 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2392   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2393 #else
2394   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2395   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2396 #endif
2397   /* ---------------------------------------------------------------
2398      Create the sequential matrix of the same type as the local block diagonal
2399   */
2400   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2401   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2402   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2403   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2404   b    = (Mat_SeqAIJ*)B->data;
2405 
2406   /*--------------------------------------------------------------------
2407     Copy my part of matrix column indices over
2408   */
2409   sendcount  = ad->nz + bd->nz;
2410   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2411   a_jsendbuf = ad->j;
2412   b_jsendbuf = bd->j;
2413   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2414   cnt        = 0;
2415   for (i=0; i<n; i++) {
2416 
2417     /* put in lower diagonal portion */
2418     m = bd->i[i+1] - bd->i[i];
2419     while (m > 0) {
2420       /* is it above diagonal (in bd (compressed) numbering) */
2421       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2422       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2423       m--;
2424     }
2425 
2426     /* put in diagonal portion */
2427     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2428       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2429     }
2430 
2431     /* put in upper diagonal portion */
2432     while (m-- > 0) {
2433       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2434     }
2435   }
2436   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2437 
2438   /*--------------------------------------------------------------------
2439     Gather all column indices to all processors
2440   */
2441   for (i=0; i<size; i++) {
2442     recvcounts[i] = 0;
2443     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2444       recvcounts[i] += lens[j];
2445     }
2446   }
2447   displs[0] = 0;
2448   for (i=1; i<size; i++) {
2449     displs[i] = displs[i-1] + recvcounts[i-1];
2450   }
2451 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2452   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2453 #else
2454   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2455 #endif
2456   /*--------------------------------------------------------------------
2457     Assemble the matrix into useable form (note numerical values not yet set)
2458   */
2459   /* set the b->ilen (length of each row) values */
2460   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2461   /* set the b->i indices */
2462   b->i[0] = 0;
2463   for (i=1; i<=A->rmap->N/bs; i++) {
2464     b->i[i] = b->i[i-1] + lens[i-1];
2465   }
2466   ierr = PetscFree(lens);CHKERRQ(ierr);
2467   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2468   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2469   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2470 
2471   if (A->symmetric) {
2472     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2473   } else if (A->hermitian) {
2474     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2475   } else if (A->structurally_symmetric) {
2476     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2477   }
2478   *newmat = B;
2479   PetscFunctionReturn(0);
2480 }
2481 
2482 #undef __FUNCT__
2483 #define __FUNCT__ "MatSOR_MPIBAIJ"
2484 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2485 {
2486   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2487   PetscErrorCode ierr;
2488   Vec            bb1 = 0;
2489 
2490   PetscFunctionBegin;
2491   if (flag == SOR_APPLY_UPPER) {
2492     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2493     PetscFunctionReturn(0);
2494   }
2495 
2496   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2497     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2498   }
2499 
2500   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2501     if (flag & SOR_ZERO_INITIAL_GUESS) {
2502       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2503       its--;
2504     }
2505 
2506     while (its--) {
2507       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2508       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2509 
2510       /* update rhs: bb1 = bb - B*x */
2511       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2512       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2513 
2514       /* local sweep */
2515       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2516     }
2517   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2518     if (flag & SOR_ZERO_INITIAL_GUESS) {
2519       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2520       its--;
2521     }
2522     while (its--) {
2523       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2524       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2525 
2526       /* update rhs: bb1 = bb - B*x */
2527       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2528       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2529 
2530       /* local sweep */
2531       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2532     }
2533   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2534     if (flag & SOR_ZERO_INITIAL_GUESS) {
2535       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2536       its--;
2537     }
2538     while (its--) {
2539       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2540       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2541 
2542       /* update rhs: bb1 = bb - B*x */
2543       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2544       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2545 
2546       /* local sweep */
2547       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2548     }
2549   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2550 
2551   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 #undef __FUNCT__
2556 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2557 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2558 {
2559   PetscErrorCode ierr;
2560   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2561   PetscInt       N,i,*garray = aij->garray;
2562   PetscInt       ib,jb,bs = A->rmap->bs;
2563   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2564   MatScalar      *a_val = a_aij->a;
2565   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2566   MatScalar      *b_val = b_aij->a;
2567   PetscReal      *work;
2568 
2569   PetscFunctionBegin;
2570   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2571   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2572   if (type == NORM_2) {
2573     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2574       for (jb=0; jb<bs; jb++) {
2575         for (ib=0; ib<bs; ib++) {
2576           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2577           a_val++;
2578         }
2579       }
2580     }
2581     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2582       for (jb=0; jb<bs; jb++) {
2583         for (ib=0; ib<bs; ib++) {
2584           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2585           b_val++;
2586         }
2587       }
2588     }
2589   } else if (type == NORM_1) {
2590     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2591       for (jb=0; jb<bs; jb++) {
2592         for (ib=0; ib<bs; ib++) {
2593           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2594           a_val++;
2595         }
2596       }
2597     }
2598     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2599       for (jb=0; jb<bs; jb++) {
2600        for (ib=0; ib<bs; ib++) {
2601           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2602           b_val++;
2603         }
2604       }
2605     }
2606   } else if (type == NORM_INFINITY) {
2607     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2608       for (jb=0; jb<bs; jb++) {
2609         for (ib=0; ib<bs; ib++) {
2610           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2611           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2612           a_val++;
2613         }
2614       }
2615     }
2616     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2617       for (jb=0; jb<bs; jb++) {
2618         for (ib=0; ib<bs; ib++) {
2619           int col = garray[b_aij->j[i]] * bs + jb;
2620           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2621           b_val++;
2622         }
2623       }
2624     }
2625   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2626   if (type == NORM_INFINITY) {
2627     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2628   } else {
2629     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2630   }
2631   ierr = PetscFree(work);CHKERRQ(ierr);
2632   if (type == NORM_2) {
2633     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2634   }
2635   PetscFunctionReturn(0);
2636 }
2637 
2638 #undef __FUNCT__
2639 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2640 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2641 {
2642   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2643   PetscErrorCode ierr;
2644 
2645   PetscFunctionBegin;
2646   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2647   A->factorerrortype             = a->A->factorerrortype;
2648   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2649   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2650   PetscFunctionReturn(0);
2651 }
2652 
2653 #undef __FUNCT__
2654 #define __FUNCT__ "MatShift_MPIBAIJ"
2655 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2656 {
2657   PetscErrorCode ierr;
2658   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2659   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2660 
2661   PetscFunctionBegin;
2662   if (!Y->preallocated) {
2663     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2664   } else if (!aij->nz) {
2665     PetscInt nonew = aij->nonew;
2666     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2667     aij->nonew = nonew;
2668   }
2669   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2670   PetscFunctionReturn(0);
2671 }
2672 
2673 #undef __FUNCT__
2674 #define __FUNCT__ "MatMissingDiagonal_MPIBAIJ"
2675 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2676 {
2677   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2678   PetscErrorCode ierr;
2679 
2680   PetscFunctionBegin;
2681   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2682   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2683   if (d) {
2684     PetscInt rstart;
2685     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2686     *d += rstart/A->rmap->bs;
2687 
2688   }
2689   PetscFunctionReturn(0);
2690 }
2691 
2692 #undef __FUNCT__
2693 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2694 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2695 {
2696   PetscFunctionBegin;
2697   *a = ((Mat_MPIBAIJ*)A->data)->A;
2698   PetscFunctionReturn(0);
2699 }
2700 
2701 /* -------------------------------------------------------------------*/
2702 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2703                                        MatGetRow_MPIBAIJ,
2704                                        MatRestoreRow_MPIBAIJ,
2705                                        MatMult_MPIBAIJ,
2706                                 /* 4*/ MatMultAdd_MPIBAIJ,
2707                                        MatMultTranspose_MPIBAIJ,
2708                                        MatMultTransposeAdd_MPIBAIJ,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                 /*10*/ 0,
2713                                        0,
2714                                        0,
2715                                        MatSOR_MPIBAIJ,
2716                                        MatTranspose_MPIBAIJ,
2717                                 /*15*/ MatGetInfo_MPIBAIJ,
2718                                        MatEqual_MPIBAIJ,
2719                                        MatGetDiagonal_MPIBAIJ,
2720                                        MatDiagonalScale_MPIBAIJ,
2721                                        MatNorm_MPIBAIJ,
2722                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2723                                        MatAssemblyEnd_MPIBAIJ,
2724                                        MatSetOption_MPIBAIJ,
2725                                        MatZeroEntries_MPIBAIJ,
2726                                 /*24*/ MatZeroRows_MPIBAIJ,
2727                                        0,
2728                                        0,
2729                                        0,
2730                                        0,
2731                                 /*29*/ MatSetUp_MPIBAIJ,
2732                                        0,
2733                                        0,
2734                                        MatGetDiagonalBlock_MPIBAIJ,
2735                                        0,
2736                                 /*34*/ MatDuplicate_MPIBAIJ,
2737                                        0,
2738                                        0,
2739                                        0,
2740                                        0,
2741                                 /*39*/ MatAXPY_MPIBAIJ,
2742                                        MatGetSubMatrices_MPIBAIJ,
2743                                        MatIncreaseOverlap_MPIBAIJ,
2744                                        MatGetValues_MPIBAIJ,
2745                                        MatCopy_MPIBAIJ,
2746                                 /*44*/ 0,
2747                                        MatScale_MPIBAIJ,
2748                                        MatShift_MPIBAIJ,
2749                                        0,
2750                                        MatZeroRowsColumns_MPIBAIJ,
2751                                 /*49*/ 0,
2752                                        0,
2753                                        0,
2754                                        0,
2755                                        0,
2756                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2757                                        0,
2758                                        MatSetUnfactored_MPIBAIJ,
2759                                        MatPermute_MPIBAIJ,
2760                                        MatSetValuesBlocked_MPIBAIJ,
2761                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2762                                        MatDestroy_MPIBAIJ,
2763                                        MatView_MPIBAIJ,
2764                                        0,
2765                                        0,
2766                                 /*64*/ 0,
2767                                        0,
2768                                        0,
2769                                        0,
2770                                        0,
2771                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2772                                        0,
2773                                        0,
2774                                        0,
2775                                        0,
2776                                 /*74*/ 0,
2777                                        MatFDColoringApply_BAIJ,
2778                                        0,
2779                                        0,
2780                                        0,
2781                                 /*79*/ 0,
2782                                        0,
2783                                        0,
2784                                        0,
2785                                        MatLoad_MPIBAIJ,
2786                                 /*84*/ 0,
2787                                        0,
2788                                        0,
2789                                        0,
2790                                        0,
2791                                 /*89*/ 0,
2792                                        0,
2793                                        0,
2794                                        0,
2795                                        0,
2796                                 /*94*/ 0,
2797                                        0,
2798                                        0,
2799                                        0,
2800                                        0,
2801                                 /*99*/ 0,
2802                                        0,
2803                                        0,
2804                                        0,
2805                                        0,
2806                                 /*104*/0,
2807                                        MatRealPart_MPIBAIJ,
2808                                        MatImaginaryPart_MPIBAIJ,
2809                                        0,
2810                                        0,
2811                                 /*109*/0,
2812                                        0,
2813                                        0,
2814                                        0,
2815                                        MatMissingDiagonal_MPIBAIJ,
2816                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2817                                        0,
2818                                        MatGetGhosts_MPIBAIJ,
2819                                        0,
2820                                        0,
2821                                 /*119*/0,
2822                                        0,
2823                                        0,
2824                                        0,
2825                                        MatGetMultiProcBlock_MPIBAIJ,
2826                                 /*124*/0,
2827                                        MatGetColumnNorms_MPIBAIJ,
2828                                        MatInvertBlockDiagonal_MPIBAIJ,
2829                                        0,
2830                                        0,
2831                                /*129*/ 0,
2832                                        0,
2833                                        0,
2834                                        0,
2835                                        0,
2836                                /*134*/ 0,
2837                                        0,
2838                                        0,
2839                                        0,
2840                                        0,
2841                                /*139*/ 0,
2842                                        0,
2843                                        0,
2844                                        MatFDColoringSetUp_MPIXAIJ,
2845                                        0,
2846                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2847 };
2848 
2849 
2850 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2851 
2852 #undef __FUNCT__
2853 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2854 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2855 {
2856   PetscInt       m,rstart,cstart,cend;
2857   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2858   const PetscInt *JJ    =0;
2859   PetscScalar    *values=0;
2860   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2861   PetscErrorCode ierr;
2862 
2863   PetscFunctionBegin;
2864   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2865   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2866   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2867   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2868   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2869   m      = B->rmap->n/bs;
2870   rstart = B->rmap->rstart/bs;
2871   cstart = B->cmap->rstart/bs;
2872   cend   = B->cmap->rend/bs;
2873 
2874   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2875   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2876   for (i=0; i<m; i++) {
2877     nz = ii[i+1] - ii[i];
2878     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2879     nz_max = PetscMax(nz_max,nz);
2880     JJ     = jj + ii[i];
2881     for (j=0; j<nz; j++) {
2882       if (*JJ >= cstart) break;
2883       JJ++;
2884     }
2885     d = 0;
2886     for (; j<nz; j++) {
2887       if (*JJ++ >= cend) break;
2888       d++;
2889     }
2890     d_nnz[i] = d;
2891     o_nnz[i] = nz - d;
2892   }
2893   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2894   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2895 
2896   values = (PetscScalar*)V;
2897   if (!values) {
2898     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2899     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2900   }
2901   for (i=0; i<m; i++) {
2902     PetscInt          row    = i + rstart;
2903     PetscInt          ncols  = ii[i+1] - ii[i];
2904     const PetscInt    *icols = jj + ii[i];
2905     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2906       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2907       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2908     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2909       PetscInt j;
2910       for (j=0; j<ncols; j++) {
2911         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2912         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2913       }
2914     }
2915   }
2916 
2917   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2918   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2919   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2920   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2921   PetscFunctionReturn(0);
2922 }
2923 
2924 #undef __FUNCT__
2925 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2926 /*@C
2927    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2928    (the default parallel PETSc format).
2929 
2930    Collective on MPI_Comm
2931 
2932    Input Parameters:
2933 +  B - the matrix
2934 .  bs - the block size
2935 .  i - the indices into j for the start of each local row (starts with zero)
2936 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2937 -  v - optional values in the matrix
2938 
2939    Level: developer
2940 
2941    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2942    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2943    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2944    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2945    block column and the second index is over columns within a block.
2946 
2947 .keywords: matrix, aij, compressed row, sparse, parallel
2948 
2949 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2950 @*/
2951 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2952 {
2953   PetscErrorCode ierr;
2954 
2955   PetscFunctionBegin;
2956   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2957   PetscValidType(B,1);
2958   PetscValidLogicalCollectiveInt(B,bs,2);
2959   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2960   PetscFunctionReturn(0);
2961 }
2962 
2963 #undef __FUNCT__
2964 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2965 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2966 {
2967   Mat_MPIBAIJ    *b;
2968   PetscErrorCode ierr;
2969   PetscInt       i;
2970 
2971   PetscFunctionBegin;
2972   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2973   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2974   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2975   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2976 
2977   if (d_nnz) {
2978     for (i=0; i<B->rmap->n/bs; i++) {
2979       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2980     }
2981   }
2982   if (o_nnz) {
2983     for (i=0; i<B->rmap->n/bs; i++) {
2984       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2985     }
2986   }
2987 
2988   b      = (Mat_MPIBAIJ*)B->data;
2989   b->bs2 = bs*bs;
2990   b->mbs = B->rmap->n/bs;
2991   b->nbs = B->cmap->n/bs;
2992   b->Mbs = B->rmap->N/bs;
2993   b->Nbs = B->cmap->N/bs;
2994 
2995   for (i=0; i<=b->size; i++) {
2996     b->rangebs[i] = B->rmap->range[i]/bs;
2997   }
2998   b->rstartbs = B->rmap->rstart/bs;
2999   b->rendbs   = B->rmap->rend/bs;
3000   b->cstartbs = B->cmap->rstart/bs;
3001   b->cendbs   = B->cmap->rend/bs;
3002 
3003 #if defined(PETSC_USE_CTABLE)
3004   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
3005 #else
3006   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
3007 #endif
3008   ierr = PetscFree(b->garray);CHKERRQ(ierr);
3009   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
3010   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
3011 
3012   /* Because the B will have been resized we simply destroy it and create a new one each time */
3013   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3014   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3015   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3016   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3017   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3018 
3019   if (!B->preallocated) {
3020     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3021     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3022     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3023     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3024     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
3025   }
3026 
3027   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3028   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3029   B->preallocated  = PETSC_TRUE;
3030   B->was_assembled = PETSC_FALSE;
3031   B->assembled     = PETSC_FALSE;
3032   PetscFunctionReturn(0);
3033 }
3034 
3035 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3036 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3037 
3038 #undef __FUNCT__
3039 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3040 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3041 {
3042   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3043   PetscErrorCode ierr;
3044   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3045   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3046   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3047 
3048   PetscFunctionBegin;
3049   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
3050   ii[0] = 0;
3051   for (i=0; i<M; i++) {
3052     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3053     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3054     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3055     /* remove one from count of matrix has diagonal */
3056     for (j=id[i]; j<id[i+1]; j++) {
3057       if (jd[j] == i) {ii[i+1]--;break;}
3058     }
3059   }
3060   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
3061   cnt  = 0;
3062   for (i=0; i<M; i++) {
3063     for (j=io[i]; j<io[i+1]; j++) {
3064       if (garray[jo[j]] > rstart) break;
3065       jj[cnt++] = garray[jo[j]];
3066     }
3067     for (k=id[i]; k<id[i+1]; k++) {
3068       if (jd[k] != i) {
3069         jj[cnt++] = rstart + jd[k];
3070       }
3071     }
3072     for (; j<io[i+1]; j++) {
3073       jj[cnt++] = garray[jo[j]];
3074     }
3075   }
3076   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
3077   PetscFunctionReturn(0);
3078 }
3079 
3080 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3081 
3082 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3083 
3084 #undef __FUNCT__
3085 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3086 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3087 {
3088   PetscErrorCode ierr;
3089   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3090   Mat            B;
3091   Mat_MPIAIJ     *b;
3092 
3093   PetscFunctionBegin;
3094   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
3095 
3096   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3097   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3098   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3099   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3100   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3101   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
3102   b    = (Mat_MPIAIJ*) B->data;
3103 
3104   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3105   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3106   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3107   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3108   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3109   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3110   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3111   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3112   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3113   if (reuse == MAT_INPLACE_MATRIX) {
3114     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3115   } else {
3116    *newmat = B;
3117   }
3118   PetscFunctionReturn(0);
3119 }
3120 
3121 /*MC
3122    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3123 
3124    Options Database Keys:
3125 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3126 . -mat_block_size <bs> - set the blocksize used to store the matrix
3127 - -mat_use_hash_table <fact>
3128 
3129   Level: beginner
3130 
3131 .seealso: MatCreateMPIBAIJ
3132 M*/
3133 
3134 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3135 
3136 #undef __FUNCT__
3137 #define __FUNCT__ "MatCreate_MPIBAIJ"
3138 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3139 {
3140   Mat_MPIBAIJ    *b;
3141   PetscErrorCode ierr;
3142   PetscBool      flg = PETSC_FALSE;
3143 
3144   PetscFunctionBegin;
3145   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3146   B->data = (void*)b;
3147 
3148   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3149   B->assembled = PETSC_FALSE;
3150 
3151   B->insertmode = NOT_SET_VALUES;
3152   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3153   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3154 
3155   /* build local table of row and column ownerships */
3156   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
3157 
3158   /* build cache for off array entries formed */
3159   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3160 
3161   b->donotstash  = PETSC_FALSE;
3162   b->colmap      = NULL;
3163   b->garray      = NULL;
3164   b->roworiented = PETSC_TRUE;
3165 
3166   /* stuff used in block assembly */
3167   b->barray = 0;
3168 
3169   /* stuff used for matrix vector multiply */
3170   b->lvec  = 0;
3171   b->Mvctx = 0;
3172 
3173   /* stuff for MatGetRow() */
3174   b->rowindices   = 0;
3175   b->rowvalues    = 0;
3176   b->getrowactive = PETSC_FALSE;
3177 
3178   /* hash table stuff */
3179   b->ht           = 0;
3180   b->hd           = 0;
3181   b->ht_size      = 0;
3182   b->ht_flag      = PETSC_FALSE;
3183   b->ht_fact      = 0;
3184   b->ht_total_ct  = 0;
3185   b->ht_insert_ct = 0;
3186 
3187   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3188   b->ijonly = PETSC_FALSE;
3189 
3190 
3191   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3192   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3193   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3194 #if defined(PETSC_HAVE_HYPRE)
3195   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3196 #endif
3197   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3198   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3199   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3200   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3201   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3202   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3203   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3204 
3205   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3206   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
3207   if (flg) {
3208     PetscReal fact = 1.39;
3209     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3210     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3211     if (fact <= 1.0) fact = 1.39;
3212     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3213     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3214   }
3215   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3216   PetscFunctionReturn(0);
3217 }
3218 
3219 /*MC
3220    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3221 
3222    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3223    and MATMPIBAIJ otherwise.
3224 
3225    Options Database Keys:
3226 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3227 
3228   Level: beginner
3229 
3230 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3231 M*/
3232 
3233 #undef __FUNCT__
3234 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3235 /*@C
3236    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3237    (block compressed row).  For good matrix assembly performance
3238    the user should preallocate the matrix storage by setting the parameters
3239    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3240    performance can be increased by more than a factor of 50.
3241 
3242    Collective on Mat
3243 
3244    Input Parameters:
3245 +  B - the matrix
3246 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3247           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3248 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3249            submatrix  (same for all local rows)
3250 .  d_nnz - array containing the number of block nonzeros in the various block rows
3251            of the in diagonal portion of the local (possibly different for each block
3252            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3253            set it even if it is zero.
3254 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3255            submatrix (same for all local rows).
3256 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3257            off-diagonal portion of the local submatrix (possibly different for
3258            each block row) or NULL.
3259 
3260    If the *_nnz parameter is given then the *_nz parameter is ignored
3261 
3262    Options Database Keys:
3263 +   -mat_block_size - size of the blocks to use
3264 -   -mat_use_hash_table <fact>
3265 
3266    Notes:
3267    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3268    than it must be used on all processors that share the object for that argument.
3269 
3270    Storage Information:
3271    For a square global matrix we define each processor's diagonal portion
3272    to be its local rows and the corresponding columns (a square submatrix);
3273    each processor's off-diagonal portion encompasses the remainder of the
3274    local matrix (a rectangular submatrix).
3275 
3276    The user can specify preallocated storage for the diagonal part of
3277    the local submatrix with either d_nz or d_nnz (not both).  Set
3278    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3279    memory allocation.  Likewise, specify preallocated storage for the
3280    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3281 
3282    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3283    the figure below we depict these three local rows and all columns (0-11).
3284 
3285 .vb
3286            0 1 2 3 4 5 6 7 8 9 10 11
3287           --------------------------
3288    row 3  |o o o d d d o o o o  o  o
3289    row 4  |o o o d d d o o o o  o  o
3290    row 5  |o o o d d d o o o o  o  o
3291           --------------------------
3292 .ve
3293 
3294    Thus, any entries in the d locations are stored in the d (diagonal)
3295    submatrix, and any entries in the o locations are stored in the
3296    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3297    stored simply in the MATSEQBAIJ format for compressed row storage.
3298 
3299    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3300    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3301    In general, for PDE problems in which most nonzeros are near the diagonal,
3302    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3303    or you will get TERRIBLE performance; see the users' manual chapter on
3304    matrices.
3305 
3306    You can call MatGetInfo() to get information on how effective the preallocation was;
3307    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3308    You can also run with the option -info and look for messages with the string
3309    malloc in them to see if additional memory allocation was needed.
3310 
3311    Level: intermediate
3312 
3313 .keywords: matrix, block, aij, compressed row, sparse, parallel
3314 
3315 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3316 @*/
3317 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3318 {
3319   PetscErrorCode ierr;
3320 
3321   PetscFunctionBegin;
3322   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3323   PetscValidType(B,1);
3324   PetscValidLogicalCollectiveInt(B,bs,2);
3325   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3326   PetscFunctionReturn(0);
3327 }
3328 
3329 #undef __FUNCT__
3330 #define __FUNCT__ "MatCreateBAIJ"
3331 /*@C
3332    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3333    (block compressed row).  For good matrix assembly performance
3334    the user should preallocate the matrix storage by setting the parameters
3335    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3336    performance can be increased by more than a factor of 50.
3337 
3338    Collective on MPI_Comm
3339 
3340    Input Parameters:
3341 +  comm - MPI communicator
3342 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3343           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3344 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3345            This value should be the same as the local size used in creating the
3346            y vector for the matrix-vector product y = Ax.
3347 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3348            This value should be the same as the local size used in creating the
3349            x vector for the matrix-vector product y = Ax.
3350 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3351 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3352 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3353            submatrix  (same for all local rows)
3354 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3355            of the in diagonal portion of the local (possibly different for each block
3356            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3357            and set it even if it is zero.
3358 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3359            submatrix (same for all local rows).
3360 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3361            off-diagonal portion of the local submatrix (possibly different for
3362            each block row) or NULL.
3363 
3364    Output Parameter:
3365 .  A - the matrix
3366 
3367    Options Database Keys:
3368 +   -mat_block_size - size of the blocks to use
3369 -   -mat_use_hash_table <fact>
3370 
3371    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3372    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3373    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3374 
3375    Notes:
3376    If the *_nnz parameter is given then the *_nz parameter is ignored
3377 
3378    A nonzero block is any block that as 1 or more nonzeros in it
3379 
3380    The user MUST specify either the local or global matrix dimensions
3381    (possibly both).
3382 
3383    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3384    than it must be used on all processors that share the object for that argument.
3385 
3386    Storage Information:
3387    For a square global matrix we define each processor's diagonal portion
3388    to be its local rows and the corresponding columns (a square submatrix);
3389    each processor's off-diagonal portion encompasses the remainder of the
3390    local matrix (a rectangular submatrix).
3391 
3392    The user can specify preallocated storage for the diagonal part of
3393    the local submatrix with either d_nz or d_nnz (not both).  Set
3394    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3395    memory allocation.  Likewise, specify preallocated storage for the
3396    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3397 
3398    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3399    the figure below we depict these three local rows and all columns (0-11).
3400 
3401 .vb
3402            0 1 2 3 4 5 6 7 8 9 10 11
3403           --------------------------
3404    row 3  |o o o d d d o o o o  o  o
3405    row 4  |o o o d d d o o o o  o  o
3406    row 5  |o o o d d d o o o o  o  o
3407           --------------------------
3408 .ve
3409 
3410    Thus, any entries in the d locations are stored in the d (diagonal)
3411    submatrix, and any entries in the o locations are stored in the
3412    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3413    stored simply in the MATSEQBAIJ format for compressed row storage.
3414 
3415    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3416    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3417    In general, for PDE problems in which most nonzeros are near the diagonal,
3418    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3419    or you will get TERRIBLE performance; see the users' manual chapter on
3420    matrices.
3421 
3422    Level: intermediate
3423 
3424 .keywords: matrix, block, aij, compressed row, sparse, parallel
3425 
3426 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3427 @*/
3428 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3429 {
3430   PetscErrorCode ierr;
3431   PetscMPIInt    size;
3432 
3433   PetscFunctionBegin;
3434   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3435   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3436   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3437   if (size > 1) {
3438     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3439     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3440   } else {
3441     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3442     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3443   }
3444   PetscFunctionReturn(0);
3445 }
3446 
3447 #undef __FUNCT__
3448 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3449 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3450 {
3451   Mat            mat;
3452   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3453   PetscErrorCode ierr;
3454   PetscInt       len=0;
3455 
3456   PetscFunctionBegin;
3457   *newmat = 0;
3458   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3459   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3460   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3461   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3462 
3463   mat->factortype   = matin->factortype;
3464   mat->preallocated = PETSC_TRUE;
3465   mat->assembled    = PETSC_TRUE;
3466   mat->insertmode   = NOT_SET_VALUES;
3467 
3468   a             = (Mat_MPIBAIJ*)mat->data;
3469   mat->rmap->bs = matin->rmap->bs;
3470   a->bs2        = oldmat->bs2;
3471   a->mbs        = oldmat->mbs;
3472   a->nbs        = oldmat->nbs;
3473   a->Mbs        = oldmat->Mbs;
3474   a->Nbs        = oldmat->Nbs;
3475 
3476   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3477   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3478 
3479   a->size         = oldmat->size;
3480   a->rank         = oldmat->rank;
3481   a->donotstash   = oldmat->donotstash;
3482   a->roworiented  = oldmat->roworiented;
3483   a->rowindices   = 0;
3484   a->rowvalues    = 0;
3485   a->getrowactive = PETSC_FALSE;
3486   a->barray       = 0;
3487   a->rstartbs     = oldmat->rstartbs;
3488   a->rendbs       = oldmat->rendbs;
3489   a->cstartbs     = oldmat->cstartbs;
3490   a->cendbs       = oldmat->cendbs;
3491 
3492   /* hash table stuff */
3493   a->ht           = 0;
3494   a->hd           = 0;
3495   a->ht_size      = 0;
3496   a->ht_flag      = oldmat->ht_flag;
3497   a->ht_fact      = oldmat->ht_fact;
3498   a->ht_total_ct  = 0;
3499   a->ht_insert_ct = 0;
3500 
3501   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3502   if (oldmat->colmap) {
3503 #if defined(PETSC_USE_CTABLE)
3504     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3505 #else
3506     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3507     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3508     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3509 #endif
3510   } else a->colmap = 0;
3511 
3512   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3513     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3514     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3515     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3516   } else a->garray = 0;
3517 
3518   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3519   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3520   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3521   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3522   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3523 
3524   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3525   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3526   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3527   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3528   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3529   *newmat = mat;
3530   PetscFunctionReturn(0);
3531 }
3532 
3533 #undef __FUNCT__
3534 #define __FUNCT__ "MatLoad_MPIBAIJ"
3535 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3536 {
3537   PetscErrorCode ierr;
3538   int            fd;
3539   PetscInt       i,nz,j,rstart,rend;
3540   PetscScalar    *vals,*buf;
3541   MPI_Comm       comm;
3542   MPI_Status     status;
3543   PetscMPIInt    rank,size,maxnz;
3544   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3545   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3546   PetscInt       jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
3547   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3548   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3549   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3550 
3551   PetscFunctionBegin;
3552   /* force binary viewer to load .info file if it has not yet done so */
3553   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3554   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3555   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3556   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3557   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3558   if (bs < 0) bs = 1;
3559 
3560   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3561   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3562   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3563   if (!rank) {
3564     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3565     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3566     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3567   }
3568   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3569   M    = header[1]; N = header[2];
3570 
3571   /* If global sizes are set, check if they are consistent with that given in the file */
3572   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
3573   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
3574 
3575   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3576 
3577   /*
3578      This code adds extra rows to make sure the number of rows is
3579      divisible by the blocksize
3580   */
3581   Mbs        = M/bs;
3582   extra_rows = bs - M + bs*Mbs;
3583   if (extra_rows == bs) extra_rows = 0;
3584   else                  Mbs++;
3585   if (extra_rows && !rank) {
3586     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3587   }
3588 
3589   /* determine ownership of all rows */
3590   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3591     mbs = Mbs/size + ((Mbs % size) > rank);
3592     m   = mbs*bs;
3593   } else { /* User set */
3594     m   = newmat->rmap->n;
3595     mbs = m/bs;
3596   }
3597   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3598   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3599 
3600   /* process 0 needs enough room for process with most rows */
3601   if (!rank) {
3602     mmax = rowners[1];
3603     for (i=2; i<=size; i++) {
3604       mmax = PetscMax(mmax,rowners[i]);
3605     }
3606     mmax*=bs;
3607   } else mmax = -1;             /* unused, but compiler warns anyway */
3608 
3609   rowners[0] = 0;
3610   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3611   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3612   rstart = rowners[rank];
3613   rend   = rowners[rank+1];
3614 
3615   /* distribute row lengths to all processors */
3616   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3617   if (!rank) {
3618     mend = m;
3619     if (size == 1) mend = mend - extra_rows;
3620     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3621     for (j=mend; j<m; j++) locrowlens[j] = 1;
3622     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3623     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3624     for (j=0; j<m; j++) {
3625       procsnz[0] += locrowlens[j];
3626     }
3627     for (i=1; i<size; i++) {
3628       mend = browners[i+1] - browners[i];
3629       if (i == size-1) mend = mend - extra_rows;
3630       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3631       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3632       /* calculate the number of nonzeros on each processor */
3633       for (j=0; j<browners[i+1]-browners[i]; j++) {
3634         procsnz[i] += rowlengths[j];
3635       }
3636       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3637     }
3638     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3639   } else {
3640     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3641   }
3642 
3643   if (!rank) {
3644     /* determine max buffer needed and allocate it */
3645     maxnz = procsnz[0];
3646     for (i=1; i<size; i++) {
3647       maxnz = PetscMax(maxnz,procsnz[i]);
3648     }
3649     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3650 
3651     /* read in my part of the matrix column indices  */
3652     nz     = procsnz[0];
3653     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3654     mycols = ibuf;
3655     if (size == 1) nz -= extra_rows;
3656     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3657     if (size == 1) {
3658       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3659     }
3660 
3661     /* read in every ones (except the last) and ship off */
3662     for (i=1; i<size-1; i++) {
3663       nz   = procsnz[i];
3664       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3665       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3666     }
3667     /* read in the stuff for the last proc */
3668     if (size != 1) {
3669       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3670       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3671       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3672       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3673     }
3674     ierr = PetscFree(cols);CHKERRQ(ierr);
3675   } else {
3676     /* determine buffer space needed for message */
3677     nz = 0;
3678     for (i=0; i<m; i++) {
3679       nz += locrowlens[i];
3680     }
3681     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3682     mycols = ibuf;
3683     /* receive message of column indices*/
3684     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3685     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3686     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3687   }
3688 
3689   /* loop over local rows, determining number of off diagonal entries */
3690   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3691   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3692   rowcount = 0; nzcount = 0;
3693   for (i=0; i<mbs; i++) {
3694     dcount  = 0;
3695     odcount = 0;
3696     for (j=0; j<bs; j++) {
3697       kmax = locrowlens[rowcount];
3698       for (k=0; k<kmax; k++) {
3699         tmp = mycols[nzcount++]/bs;
3700         if (!mask[tmp]) {
3701           mask[tmp] = 1;
3702           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3703           else masked1[dcount++] = tmp;
3704         }
3705       }
3706       rowcount++;
3707     }
3708 
3709     dlens[i]  = dcount;
3710     odlens[i] = odcount;
3711 
3712     /* zero out the mask elements we set */
3713     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3714     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3715   }
3716 
3717   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3718   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3719 
3720   if (!rank) {
3721     ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr);
3722     /* read in my part of the matrix numerical values  */
3723     nz     = procsnz[0];
3724     vals   = buf;
3725     mycols = ibuf;
3726     if (size == 1) nz -= extra_rows;
3727     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3728     if (size == 1) {
3729       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3730     }
3731 
3732     /* insert into matrix */
3733     jj = rstart*bs;
3734     for (i=0; i<m; i++) {
3735       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3736       mycols += locrowlens[i];
3737       vals   += locrowlens[i];
3738       jj++;
3739     }
3740     /* read in other processors (except the last one) and ship out */
3741     for (i=1; i<size-1; i++) {
3742       nz   = procsnz[i];
3743       vals = buf;
3744       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3745       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3746     }
3747     /* the last proc */
3748     if (size != 1) {
3749       nz   = procsnz[i] - extra_rows;
3750       vals = buf;
3751       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3752       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3753       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3754     }
3755     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3756   } else {
3757     /* receive numeric values */
3758     ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr);
3759 
3760     /* receive message of values*/
3761     vals   = buf;
3762     mycols = ibuf;
3763     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3764 
3765     /* insert into matrix */
3766     jj = rstart*bs;
3767     for (i=0; i<m; i++) {
3768       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3769       mycols += locrowlens[i];
3770       vals   += locrowlens[i];
3771       jj++;
3772     }
3773   }
3774   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3775   ierr = PetscFree(buf);CHKERRQ(ierr);
3776   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3777   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3778   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3779   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3780   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3781   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3782   PetscFunctionReturn(0);
3783 }
3784 
3785 #undef __FUNCT__
3786 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3787 /*@
3788    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3789 
3790    Input Parameters:
3791 .  mat  - the matrix
3792 .  fact - factor
3793 
3794    Not Collective, each process can use a different factor
3795 
3796    Level: advanced
3797 
3798   Notes:
3799    This can also be set by the command line option: -mat_use_hash_table <fact>
3800 
3801 .keywords: matrix, hashtable, factor, HT
3802 
3803 .seealso: MatSetOption()
3804 @*/
3805 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3806 {
3807   PetscErrorCode ierr;
3808 
3809   PetscFunctionBegin;
3810   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3811   PetscFunctionReturn(0);
3812 }
3813 
3814 #undef __FUNCT__
3815 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3816 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3817 {
3818   Mat_MPIBAIJ *baij;
3819 
3820   PetscFunctionBegin;
3821   baij          = (Mat_MPIBAIJ*)mat->data;
3822   baij->ht_fact = fact;
3823   PetscFunctionReturn(0);
3824 }
3825 
3826 #undef __FUNCT__
3827 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3828 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3829 {
3830   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3831 
3832   PetscFunctionBegin;
3833   if (Ad)     *Ad     = a->A;
3834   if (Ao)     *Ao     = a->B;
3835   if (colmap) *colmap = a->garray;
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 /*
3840     Special version for direct calls from Fortran (to eliminate two function call overheads
3841 */
3842 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3843 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3844 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3845 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3846 #endif
3847 
3848 #undef __FUNCT__
3849 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3850 /*@C
3851   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3852 
3853   Collective on Mat
3854 
3855   Input Parameters:
3856 + mat - the matrix
3857 . min - number of input rows
3858 . im - input rows
3859 . nin - number of input columns
3860 . in - input columns
3861 . v - numerical values input
3862 - addvin - INSERT_VALUES or ADD_VALUES
3863 
3864   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3865 
3866   Level: advanced
3867 
3868 .seealso:   MatSetValuesBlocked()
3869 @*/
3870 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3871 {
3872   /* convert input arguments to C version */
3873   Mat        mat  = *matin;
3874   PetscInt   m    = *min, n = *nin;
3875   InsertMode addv = *addvin;
3876 
3877   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3878   const MatScalar *value;
3879   MatScalar       *barray     = baij->barray;
3880   PetscBool       roworiented = baij->roworiented;
3881   PetscErrorCode  ierr;
3882   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3883   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3884   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3885 
3886   PetscFunctionBegin;
3887   /* tasks normally handled by MatSetValuesBlocked() */
3888   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3889 #if defined(PETSC_USE_DEBUG)
3890   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3891   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3892 #endif
3893   if (mat->assembled) {
3894     mat->was_assembled = PETSC_TRUE;
3895     mat->assembled     = PETSC_FALSE;
3896   }
3897   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3898 
3899 
3900   if (!barray) {
3901     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3902     baij->barray = barray;
3903   }
3904 
3905   if (roworiented) stepval = (n-1)*bs;
3906   else stepval = (m-1)*bs;
3907 
3908   for (i=0; i<m; i++) {
3909     if (im[i] < 0) continue;
3910 #if defined(PETSC_USE_DEBUG)
3911     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3912 #endif
3913     if (im[i] >= rstart && im[i] < rend) {
3914       row = im[i] - rstart;
3915       for (j=0; j<n; j++) {
3916         /* If NumCol = 1 then a copy is not required */
3917         if ((roworiented) && (n == 1)) {
3918           barray = (MatScalar*)v + i*bs2;
3919         } else if ((!roworiented) && (m == 1)) {
3920           barray = (MatScalar*)v + j*bs2;
3921         } else { /* Here a copy is required */
3922           if (roworiented) {
3923             value = v + i*(stepval+bs)*bs + j*bs;
3924           } else {
3925             value = v + j*(stepval+bs)*bs + i*bs;
3926           }
3927           for (ii=0; ii<bs; ii++,value+=stepval) {
3928             for (jj=0; jj<bs; jj++) {
3929               *barray++ = *value++;
3930             }
3931           }
3932           barray -=bs2;
3933         }
3934 
3935         if (in[j] >= cstart && in[j] < cend) {
3936           col  = in[j] - cstart;
3937           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3938         } else if (in[j] < 0) continue;
3939 #if defined(PETSC_USE_DEBUG)
3940         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3941 #endif
3942         else {
3943           if (mat->was_assembled) {
3944             if (!baij->colmap) {
3945               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3946             }
3947 
3948 #if defined(PETSC_USE_DEBUG)
3949 #if defined(PETSC_USE_CTABLE)
3950             { PetscInt data;
3951               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3952               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3953             }
3954 #else
3955             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3956 #endif
3957 #endif
3958 #if defined(PETSC_USE_CTABLE)
3959             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3960             col  = (col - 1)/bs;
3961 #else
3962             col = (baij->colmap[in[j]] - 1)/bs;
3963 #endif
3964             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3965               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3966               col  =  in[j];
3967             }
3968           } else col = in[j];
3969           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3970         }
3971       }
3972     } else {
3973       if (!baij->donotstash) {
3974         if (roworiented) {
3975           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3976         } else {
3977           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3978         }
3979       }
3980     }
3981   }
3982 
3983   /* task normally handled by MatSetValuesBlocked() */
3984   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3985   PetscFunctionReturn(0);
3986 }
3987 
3988 #undef __FUNCT__
3989 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3990 /*@
3991      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3992          CSR format the local rows.
3993 
3994    Collective on MPI_Comm
3995 
3996    Input Parameters:
3997 +  comm - MPI communicator
3998 .  bs - the block size, only a block size of 1 is supported
3999 .  m - number of local rows (Cannot be PETSC_DECIDE)
4000 .  n - This value should be the same as the local size used in creating the
4001        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4002        calculated if N is given) For square matrices n is almost always m.
4003 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4004 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4005 .   i - row indices
4006 .   j - column indices
4007 -   a - matrix values
4008 
4009    Output Parameter:
4010 .   mat - the matrix
4011 
4012    Level: intermediate
4013 
4014    Notes:
4015        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4016      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4017      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4018 
4019      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
4020      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
4021      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
4022      with column-major ordering within blocks.
4023 
4024        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4025 
4026 .keywords: matrix, aij, compressed row, sparse, parallel
4027 
4028 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4029           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4030 @*/
4031 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4032 {
4033   PetscErrorCode ierr;
4034 
4035   PetscFunctionBegin;
4036   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4037   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4038   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4039   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4040   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4041   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4042   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4043   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4044   PetscFunctionReturn(0);
4045 }
4046 
4047 #undef __FUNCT__
4048 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIBAIJ"
4049 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4050 {
4051   PetscErrorCode ierr;
4052   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
4053   PetscInt       *indx;
4054   PetscScalar    *values;
4055 
4056   PetscFunctionBegin;
4057   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4058   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4059     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
4060     PetscInt       *dnz,*onz,sum,mbs,Nbs;
4061     PetscInt       *bindx,rmax=a->rmax,j;
4062 
4063     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4064     mbs = m/bs; Nbs = N/cbs;
4065     if (n == PETSC_DECIDE) {
4066       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
4067     }
4068     /* Check sum(n) = Nbs */
4069     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4070     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
4071 
4072     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4073     rstart -= mbs;
4074 
4075     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
4076     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
4077     for (i=0; i<mbs; i++) {
4078       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
4079       nnz = nnz/bs;
4080       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
4081       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
4082       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
4083     }
4084     ierr = PetscFree(bindx);CHKERRQ(ierr);
4085 
4086     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4087     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4088     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4089     ierr = MatSetType(*outmat,MATMPIBAIJ);CHKERRQ(ierr);
4090     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
4091     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4092   }
4093 
4094   /* numeric phase */
4095   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4096   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4097 
4098   for (i=0; i<m; i++) {
4099     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4100     Ii   = i + rstart;
4101     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4102     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4103   }
4104   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4105   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4106   PetscFunctionReturn(0);
4107 }
4108