xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 3aa2d9e3a17455108487be9a174c0f069d9014ad)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
9 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
10 {
11   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
12   PetscErrorCode ierr;
13   PetscInt       i,*idxb = 0;
14   PetscScalar    *va,*vb;
15   Vec            vtmp;
16 
17   PetscFunctionBegin;
18   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
19   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
20   if (idx) {
21     for (i=0; i<A->rmap->n; i++) {
22       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
23     }
24   }
25 
26   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
27   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
28   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
29   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
30 
31   for (i=0; i<A->rmap->n; i++) {
32     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
33       va[i] = vb[i];
34       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
35     }
36   }
37 
38   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
39   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
40   ierr = PetscFree(idxb);CHKERRQ(ierr);
41   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
60 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
61 {
62   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
63   PetscErrorCode ierr;
64 
65   PetscFunctionBegin;
66   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
67   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
68   PetscFunctionReturn(0);
69 }
70 
71 /*
72      Local utility routine that creates a mapping from the global column
73    number to the local number in the off-diagonal part of the local
74    storage of the matrix.  This is done in a non scalable way since the
75    length of colmap equals the global matrix length.
76 */
77 #undef __FUNCT__
78 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
79 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
80 {
81   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
82   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
83   PetscErrorCode ierr;
84   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
85 
86   PetscFunctionBegin;
87 #if defined(PETSC_USE_CTABLE)
88   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
89   for (i=0; i<nbs; i++) {
90     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
91   }
92 #else
93   ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
94   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
95   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
96   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
97 #endif
98   PetscFunctionReturn(0);
99 }
100 
101 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
102   { \
103  \
104     brow = row/bs;  \
105     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
106     rmax = aimax[brow]; nrow = ailen[brow]; \
107     bcol = col/bs; \
108     ridx = row % bs; cidx = col % bs; \
109     low  = 0; high = nrow; \
110     while (high-low > 3) { \
111       t = (low+high)/2; \
112       if (rp[t] > bcol) high = t; \
113       else              low  = t; \
114     } \
115     for (_i=low; _i<high; _i++) { \
116       if (rp[_i] > bcol) break; \
117       if (rp[_i] == bcol) { \
118         bap = ap +  bs2*_i + bs*cidx + ridx; \
119         if (addv == ADD_VALUES) *bap += value;  \
120         else                    *bap  = value;  \
121         goto a_noinsert; \
122       } \
123     } \
124     if (a->nonew == 1) goto a_noinsert; \
125     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
126     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
127     N = nrow++ - 1;  \
128     /* shift up all the later entries in this row */ \
129     for (ii=N; ii>=_i; ii--) { \
130       rp[ii+1] = rp[ii]; \
131       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
132     } \
133     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
134     rp[_i]                      = bcol;  \
135     ap[bs2*_i + bs*cidx + ridx] = value;  \
136 a_noinsert:; \
137     ailen[brow] = nrow; \
138   }
139 
140 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
141   { \
142     brow = row/bs;  \
143     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
144     rmax = bimax[brow]; nrow = bilen[brow]; \
145     bcol = col/bs; \
146     ridx = row % bs; cidx = col % bs; \
147     low  = 0; high = nrow; \
148     while (high-low > 3) { \
149       t = (low+high)/2; \
150       if (rp[t] > bcol) high = t; \
151       else              low  = t; \
152     } \
153     for (_i=low; _i<high; _i++) { \
154       if (rp[_i] > bcol) break; \
155       if (rp[_i] == bcol) { \
156         bap = ap +  bs2*_i + bs*cidx + ridx; \
157         if (addv == ADD_VALUES) *bap += value;  \
158         else                    *bap  = value;  \
159         goto b_noinsert; \
160       } \
161     } \
162     if (b->nonew == 1) goto b_noinsert; \
163     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
164     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
165     N = nrow++ - 1;  \
166     /* shift up all the later entries in this row */ \
167     for (ii=N; ii>=_i; ii--) { \
168       rp[ii+1] = rp[ii]; \
169       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
170     } \
171     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
172     rp[_i]                      = bcol;  \
173     ap[bs2*_i + bs*cidx + ridx] = value;  \
174 b_noinsert:; \
175     bilen[brow] = nrow; \
176   }
177 
178 #undef __FUNCT__
179 #define __FUNCT__ "MatSetValues_MPIBAIJ"
180 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
181 {
182   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
183   MatScalar      value;
184   PetscBool      roworiented = baij->roworiented;
185   PetscErrorCode ierr;
186   PetscInt       i,j,row,col;
187   PetscInt       rstart_orig=mat->rmap->rstart;
188   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
189   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
190 
191   /* Some Variables required in the macro */
192   Mat         A     = baij->A;
193   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
194   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
195   MatScalar   *aa   =a->a;
196 
197   Mat         B     = baij->B;
198   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
199   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
200   MatScalar   *ba   =b->a;
201 
202   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
203   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
204   MatScalar *ap,*bap;
205 
206   PetscFunctionBegin;
207   for (i=0; i<m; i++) {
208     if (im[i] < 0) continue;
209 #if defined(PETSC_USE_DEBUG)
210     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
211 #endif
212     if (im[i] >= rstart_orig && im[i] < rend_orig) {
213       row = im[i] - rstart_orig;
214       for (j=0; j<n; j++) {
215         if (in[j] >= cstart_orig && in[j] < cend_orig) {
216           col = in[j] - cstart_orig;
217           if (roworiented) value = v[i*n+j];
218           else             value = v[i+j*m];
219           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
220           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
221         } else if (in[j] < 0) continue;
222 #if defined(PETSC_USE_DEBUG)
223         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
224 #endif
225         else {
226           if (mat->was_assembled) {
227             if (!baij->colmap) {
228               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
229             }
230 #if defined(PETSC_USE_CTABLE)
231             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
232             col  = col - 1;
233 #else
234             col = baij->colmap[in[j]/bs] - 1;
235 #endif
236             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
237               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
238               col  =  in[j];
239               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
240               B    = baij->B;
241               b    = (Mat_SeqBAIJ*)(B)->data;
242               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
243               ba   =b->a;
244             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
245             else col += in[j]%bs;
246           } else col = in[j];
247           if (roworiented) value = v[i*n+j];
248           else             value = v[i+j*m];
249           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
250           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
251         }
252       }
253     } else {
254       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
255       if (!baij->donotstash) {
256         mat->assembled = PETSC_FALSE;
257         if (roworiented) {
258           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
259         } else {
260           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
261         }
262       }
263     }
264   }
265   PetscFunctionReturn(0);
266 }
267 
268 #undef __FUNCT__
269 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined"
270 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
271 {
272   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
273   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
274   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
275   PetscErrorCode    ierr;
276   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
277   PetscBool         roworiented=a->roworiented;
278   const PetscScalar *value     = v;
279   MatScalar         *ap,*aa = a->a,*bap;
280 
281   PetscFunctionBegin;
282   rp   = aj + ai[row];
283   ap   = aa + bs2*ai[row];
284   rmax = imax[row];
285   nrow = ailen[row];
286   low  = 0;
287   high = nrow;
288   value = v;
289   low = 0;
290   high = nrow;
291   while (high-low > 7) {
292     t = (low+high)/2;
293     if (rp[t] > col) high = t;
294     else             low  = t;
295   }
296   for (i=low; i<high; i++) {
297     if (rp[i] > col) break;
298     if (rp[i] == col) {
299       bap = ap +  bs2*i;
300       if (roworiented) {
301         if (is == ADD_VALUES) {
302           for (ii=0; ii<bs; ii++) {
303             for (jj=ii; jj<bs2; jj+=bs) {
304               bap[jj] += *value++;
305             }
306           }
307         } else {
308           for (ii=0; ii<bs; ii++) {
309             for (jj=ii; jj<bs2; jj+=bs) {
310               bap[jj] = *value++;
311             }
312           }
313         }
314       } else {
315         if (is == ADD_VALUES) {
316           for (ii=0; ii<bs; ii++,value+=bs) {
317             for (jj=0; jj<bs; jj++) {
318               bap[jj] += value[jj];
319             }
320             bap += bs;
321           }
322         } else {
323           for (ii=0; ii<bs; ii++,value+=bs) {
324             for (jj=0; jj<bs; jj++) {
325               bap[jj]  = value[jj];
326             }
327             bap += bs;
328           }
329         }
330       }
331       goto noinsert2;
332     }
333   }
334   if (nonew == 1) goto noinsert2;
335   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
336   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
337   N = nrow++ - 1; high++;
338   /* shift up all the later entries in this row */
339   for (ii=N; ii>=i; ii--) {
340     rp[ii+1] = rp[ii];
341     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
342   }
343   if (N >= i) {
344     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
345   }
346   rp[i] = col;
347   bap   = ap +  bs2*i;
348   if (roworiented) {
349     for (ii=0; ii<bs; ii++) {
350       for (jj=ii; jj<bs2; jj+=bs) {
351         bap[jj] = *value++;
352       }
353     }
354   } else {
355     for (ii=0; ii<bs; ii++) {
356       for (jj=0; jj<bs; jj++) {
357         *bap++ = *value++;
358       }
359     }
360   }
361   noinsert2:;
362   ailen[row] = nrow;
363   PetscFunctionReturn(0);
364 }
365 
366 #undef __FUNCT__
367 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
368 /*
369     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
370     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
371 */
372 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
373 {
374   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
375   const PetscScalar *value;
376   MatScalar         *barray     = baij->barray;
377   PetscBool         roworiented = baij->roworiented;
378   PetscErrorCode    ierr;
379   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
380   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
381   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
382 
383   PetscFunctionBegin;
384   if (!barray) {
385     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
386     baij->barray = barray;
387   }
388 
389   if (roworiented) stepval = (n-1)*bs;
390   else stepval = (m-1)*bs;
391 
392   for (i=0; i<m; i++) {
393     if (im[i] < 0) continue;
394 #if defined(PETSC_USE_DEBUG)
395     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
396 #endif
397     if (im[i] >= rstart && im[i] < rend) {
398       row = im[i] - rstart;
399       for (j=0; j<n; j++) {
400         /* If NumCol = 1 then a copy is not required */
401         if ((roworiented) && (n == 1)) {
402           barray = (MatScalar*)v + i*bs2;
403         } else if ((!roworiented) && (m == 1)) {
404           barray = (MatScalar*)v + j*bs2;
405         } else { /* Here a copy is required */
406           if (roworiented) {
407             value = v + (i*(stepval+bs) + j)*bs;
408           } else {
409             value = v + (j*(stepval+bs) + i)*bs;
410           }
411           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
412             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
413             barray += bs;
414           }
415           barray -= bs2;
416         }
417 
418         if (in[j] >= cstart && in[j] < cend) {
419           col  = in[j] - cstart;
420           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
421         } else if (in[j] < 0) continue;
422 #if defined(PETSC_USE_DEBUG)
423         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
424 #endif
425         else {
426           if (mat->was_assembled) {
427             if (!baij->colmap) {
428               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
429             }
430 
431 #if defined(PETSC_USE_DEBUG)
432 #if defined(PETSC_USE_CTABLE)
433             { PetscInt data;
434               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
435               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
436             }
437 #else
438             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
439 #endif
440 #endif
441 #if defined(PETSC_USE_CTABLE)
442             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
443             col  = (col - 1)/bs;
444 #else
445             col = (baij->colmap[in[j]] - 1)/bs;
446 #endif
447             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
448               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
449               col  =  in[j];
450             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
451           } else col = in[j];
452           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
453         }
454       }
455     } else {
456       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
457       if (!baij->donotstash) {
458         if (roworiented) {
459           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
460         } else {
461           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
462         }
463       }
464     }
465   }
466   PetscFunctionReturn(0);
467 }
468 
469 #define HASH_KEY 0.6180339887
470 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
471 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
472 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
473 #undef __FUNCT__
474 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
475 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
476 {
477   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
478   PetscBool      roworiented = baij->roworiented;
479   PetscErrorCode ierr;
480   PetscInt       i,j,row,col;
481   PetscInt       rstart_orig=mat->rmap->rstart;
482   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
483   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
484   PetscReal      tmp;
485   MatScalar      **HD = baij->hd,value;
486 #if defined(PETSC_USE_DEBUG)
487   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
488 #endif
489 
490   PetscFunctionBegin;
491   for (i=0; i<m; i++) {
492 #if defined(PETSC_USE_DEBUG)
493     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
494     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
495 #endif
496     row = im[i];
497     if (row >= rstart_orig && row < rend_orig) {
498       for (j=0; j<n; j++) {
499         col = in[j];
500         if (roworiented) value = v[i*n+j];
501         else             value = v[i+j*m];
502         /* Look up PetscInto the Hash Table */
503         key = (row/bs)*Nbs+(col/bs)+1;
504         h1  = HASH(size,key,tmp);
505 
506 
507         idx = h1;
508 #if defined(PETSC_USE_DEBUG)
509         insert_ct++;
510         total_ct++;
511         if (HT[idx] != key) {
512           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
513           if (idx == size) {
514             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
515             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
516           }
517         }
518 #else
519         if (HT[idx] != key) {
520           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
521           if (idx == size) {
522             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
523             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
524           }
525         }
526 #endif
527         /* A HASH table entry is found, so insert the values at the correct address */
528         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
529         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
530       }
531     } else if (!baij->donotstash) {
532       if (roworiented) {
533         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
534       } else {
535         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
536       }
537     }
538   }
539 #if defined(PETSC_USE_DEBUG)
540   baij->ht_total_ct  = total_ct;
541   baij->ht_insert_ct = insert_ct;
542 #endif
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
548 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
549 {
550   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
551   PetscBool         roworiented = baij->roworiented;
552   PetscErrorCode    ierr;
553   PetscInt          i,j,ii,jj,row,col;
554   PetscInt          rstart=baij->rstartbs;
555   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
556   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
557   PetscReal         tmp;
558   MatScalar         **HD = baij->hd,*baij_a;
559   const PetscScalar *v_t,*value;
560 #if defined(PETSC_USE_DEBUG)
561   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
562 #endif
563 
564   PetscFunctionBegin;
565   if (roworiented) stepval = (n-1)*bs;
566   else stepval = (m-1)*bs;
567 
568   for (i=0; i<m; i++) {
569 #if defined(PETSC_USE_DEBUG)
570     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
571     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
572 #endif
573     row = im[i];
574     v_t = v + i*nbs2;
575     if (row >= rstart && row < rend) {
576       for (j=0; j<n; j++) {
577         col = in[j];
578 
579         /* Look up into the Hash Table */
580         key = row*Nbs+col+1;
581         h1  = HASH(size,key,tmp);
582 
583         idx = h1;
584 #if defined(PETSC_USE_DEBUG)
585         total_ct++;
586         insert_ct++;
587         if (HT[idx] != key) {
588           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
589           if (idx == size) {
590             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
591             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
592           }
593         }
594 #else
595         if (HT[idx] != key) {
596           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
597           if (idx == size) {
598             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
599             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
600           }
601         }
602 #endif
603         baij_a = HD[idx];
604         if (roworiented) {
605           /*value = v + i*(stepval+bs)*bs + j*bs;*/
606           /* value = v + (i*(stepval+bs)+j)*bs; */
607           value = v_t;
608           v_t  += bs;
609           if (addv == ADD_VALUES) {
610             for (ii=0; ii<bs; ii++,value+=stepval) {
611               for (jj=ii; jj<bs2; jj+=bs) {
612                 baij_a[jj] += *value++;
613               }
614             }
615           } else {
616             for (ii=0; ii<bs; ii++,value+=stepval) {
617               for (jj=ii; jj<bs2; jj+=bs) {
618                 baij_a[jj] = *value++;
619               }
620             }
621           }
622         } else {
623           value = v + j*(stepval+bs)*bs + i*bs;
624           if (addv == ADD_VALUES) {
625             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
626               for (jj=0; jj<bs; jj++) {
627                 baij_a[jj] += *value++;
628               }
629             }
630           } else {
631             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
632               for (jj=0; jj<bs; jj++) {
633                 baij_a[jj] = *value++;
634               }
635             }
636           }
637         }
638       }
639     } else {
640       if (!baij->donotstash) {
641         if (roworiented) {
642           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
643         } else {
644           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
645         }
646       }
647     }
648   }
649 #if defined(PETSC_USE_DEBUG)
650   baij->ht_total_ct  = total_ct;
651   baij->ht_insert_ct = insert_ct;
652 #endif
653   PetscFunctionReturn(0);
654 }
655 
656 #undef __FUNCT__
657 #define __FUNCT__ "MatGetValues_MPIBAIJ"
658 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
659 {
660   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
661   PetscErrorCode ierr;
662   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
663   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
664 
665   PetscFunctionBegin;
666   for (i=0; i<m; i++) {
667     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
668     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
669     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
670       row = idxm[i] - bsrstart;
671       for (j=0; j<n; j++) {
672         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
673         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
674         if (idxn[j] >= bscstart && idxn[j] < bscend) {
675           col  = idxn[j] - bscstart;
676           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
677         } else {
678           if (!baij->colmap) {
679             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
680           }
681 #if defined(PETSC_USE_CTABLE)
682           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
683           data--;
684 #else
685           data = baij->colmap[idxn[j]/bs]-1;
686 #endif
687           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
688           else {
689             col  = data + idxn[j]%bs;
690             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
691           }
692         }
693       }
694     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
695   }
696   PetscFunctionReturn(0);
697 }
698 
699 #undef __FUNCT__
700 #define __FUNCT__ "MatNorm_MPIBAIJ"
701 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
702 {
703   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
704   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
705   PetscErrorCode ierr;
706   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
707   PetscReal      sum = 0.0;
708   MatScalar      *v;
709 
710   PetscFunctionBegin;
711   if (baij->size == 1) {
712     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
713   } else {
714     if (type == NORM_FROBENIUS) {
715       v  = amat->a;
716       nz = amat->nz*bs2;
717       for (i=0; i<nz; i++) {
718         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
719       }
720       v  = bmat->a;
721       nz = bmat->nz*bs2;
722       for (i=0; i<nz; i++) {
723         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
724       }
725       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
726       *nrm = PetscSqrtReal(*nrm);
727     } else if (type == NORM_1) { /* max column sum */
728       PetscReal *tmp,*tmp2;
729       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
730       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
731       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
732       v    = amat->a; jj = amat->j;
733       for (i=0; i<amat->nz; i++) {
734         for (j=0; j<bs; j++) {
735           col = bs*(cstart + *jj) + j; /* column index */
736           for (row=0; row<bs; row++) {
737             tmp[col] += PetscAbsScalar(*v);  v++;
738           }
739         }
740         jj++;
741       }
742       v = bmat->a; jj = bmat->j;
743       for (i=0; i<bmat->nz; i++) {
744         for (j=0; j<bs; j++) {
745           col = bs*garray[*jj] + j;
746           for (row=0; row<bs; row++) {
747             tmp[col] += PetscAbsScalar(*v); v++;
748           }
749         }
750         jj++;
751       }
752       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
753       *nrm = 0.0;
754       for (j=0; j<mat->cmap->N; j++) {
755         if (tmp2[j] > *nrm) *nrm = tmp2[j];
756       }
757       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
758     } else if (type == NORM_INFINITY) { /* max row sum */
759       PetscReal *sums;
760       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
761       sum  = 0.0;
762       for (j=0; j<amat->mbs; j++) {
763         for (row=0; row<bs; row++) sums[row] = 0.0;
764         v  = amat->a + bs2*amat->i[j];
765         nz = amat->i[j+1]-amat->i[j];
766         for (i=0; i<nz; i++) {
767           for (col=0; col<bs; col++) {
768             for (row=0; row<bs; row++) {
769               sums[row] += PetscAbsScalar(*v); v++;
770             }
771           }
772         }
773         v  = bmat->a + bs2*bmat->i[j];
774         nz = bmat->i[j+1]-bmat->i[j];
775         for (i=0; i<nz; i++) {
776           for (col=0; col<bs; col++) {
777             for (row=0; row<bs; row++) {
778               sums[row] += PetscAbsScalar(*v); v++;
779             }
780           }
781         }
782         for (row=0; row<bs; row++) {
783           if (sums[row] > sum) sum = sums[row];
784         }
785       }
786       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
787       ierr = PetscFree(sums);CHKERRQ(ierr);
788     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
789   }
790   PetscFunctionReturn(0);
791 }
792 
793 /*
794   Creates the hash table, and sets the table
795   This table is created only once.
796   If new entried need to be added to the matrix
797   then the hash table has to be destroyed and
798   recreated.
799 */
800 #undef __FUNCT__
801 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
802 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
803 {
804   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
805   Mat            A     = baij->A,B=baij->B;
806   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
807   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
808   PetscErrorCode ierr;
809   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
810   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
811   PetscInt       *HT,key;
812   MatScalar      **HD;
813   PetscReal      tmp;
814 #if defined(PETSC_USE_INFO)
815   PetscInt ct=0,max=0;
816 #endif
817 
818   PetscFunctionBegin;
819   if (baij->ht) PetscFunctionReturn(0);
820 
821   baij->ht_size = (PetscInt)(factor*nz);
822   ht_size       = baij->ht_size;
823 
824   /* Allocate Memory for Hash Table */
825   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
826   HD   = baij->hd;
827   HT   = baij->ht;
828 
829   /* Loop Over A */
830   for (i=0; i<a->mbs; i++) {
831     for (j=ai[i]; j<ai[i+1]; j++) {
832       row = i+rstart;
833       col = aj[j]+cstart;
834 
835       key = row*Nbs + col + 1;
836       h1  = HASH(ht_size,key,tmp);
837       for (k=0; k<ht_size; k++) {
838         if (!HT[(h1+k)%ht_size]) {
839           HT[(h1+k)%ht_size] = key;
840           HD[(h1+k)%ht_size] = a->a + j*bs2;
841           break;
842 #if defined(PETSC_USE_INFO)
843         } else {
844           ct++;
845 #endif
846         }
847       }
848 #if defined(PETSC_USE_INFO)
849       if (k> max) max = k;
850 #endif
851     }
852   }
853   /* Loop Over B */
854   for (i=0; i<b->mbs; i++) {
855     for (j=bi[i]; j<bi[i+1]; j++) {
856       row = i+rstart;
857       col = garray[bj[j]];
858       key = row*Nbs + col + 1;
859       h1  = HASH(ht_size,key,tmp);
860       for (k=0; k<ht_size; k++) {
861         if (!HT[(h1+k)%ht_size]) {
862           HT[(h1+k)%ht_size] = key;
863           HD[(h1+k)%ht_size] = b->a + j*bs2;
864           break;
865 #if defined(PETSC_USE_INFO)
866         } else {
867           ct++;
868 #endif
869         }
870       }
871 #if defined(PETSC_USE_INFO)
872       if (k> max) max = k;
873 #endif
874     }
875   }
876 
877   /* Print Summary */
878 #if defined(PETSC_USE_INFO)
879   for (i=0,j=0; i<ht_size; i++) {
880     if (HT[i]) j++;
881   }
882   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
883 #endif
884   PetscFunctionReturn(0);
885 }
886 
887 #undef __FUNCT__
888 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
889 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
890 {
891   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
892   PetscErrorCode ierr;
893   PetscInt       nstash,reallocs;
894 
895   PetscFunctionBegin;
896   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
897 
898   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
899   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
900   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
901   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
902   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
903   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
904   PetscFunctionReturn(0);
905 }
906 
907 #undef __FUNCT__
908 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
909 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
910 {
911   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
912   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
913   PetscErrorCode ierr;
914   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
915   PetscInt       *row,*col;
916   PetscBool      r1,r2,r3,other_disassembled;
917   MatScalar      *val;
918   PetscMPIInt    n;
919 
920   PetscFunctionBegin;
921   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
922   if (!baij->donotstash && !mat->nooffprocentries) {
923     while (1) {
924       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
925       if (!flg) break;
926 
927       for (i=0; i<n;) {
928         /* Now identify the consecutive vals belonging to the same row */
929         for (j=i,rstart=row[j]; j<n; j++) {
930           if (row[j] != rstart) break;
931         }
932         if (j < n) ncols = j-i;
933         else       ncols = n-i;
934         /* Now assemble all these values with a single function call */
935         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
936         i    = j;
937       }
938     }
939     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
940     /* Now process the block-stash. Since the values are stashed column-oriented,
941        set the roworiented flag to column oriented, and after MatSetValues()
942        restore the original flags */
943     r1 = baij->roworiented;
944     r2 = a->roworiented;
945     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
946 
947     baij->roworiented = PETSC_FALSE;
948     a->roworiented    = PETSC_FALSE;
949 
950     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
951     while (1) {
952       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
953       if (!flg) break;
954 
955       for (i=0; i<n;) {
956         /* Now identify the consecutive vals belonging to the same row */
957         for (j=i,rstart=row[j]; j<n; j++) {
958           if (row[j] != rstart) break;
959         }
960         if (j < n) ncols = j-i;
961         else       ncols = n-i;
962         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
963         i    = j;
964       }
965     }
966     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
967 
968     baij->roworiented = r1;
969     a->roworiented    = r2;
970 
971     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
972   }
973 
974   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
975   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
976 
977   /* determine if any processor has disassembled, if so we must
978      also disassemble ourselfs, in order that we may reassemble. */
979   /*
980      if nonzero structure of submatrix B cannot change then we know that
981      no processor disassembled thus we can skip this stuff
982   */
983   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
984     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
985     if (mat->was_assembled && !other_disassembled) {
986       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
987     }
988   }
989 
990   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
991     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
992   }
993   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
994   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
995 
996 #if defined(PETSC_USE_INFO)
997   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
998     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
999 
1000     baij->ht_total_ct  = 0;
1001     baij->ht_insert_ct = 0;
1002   }
1003 #endif
1004   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1005     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1006 
1007     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1008     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1009   }
1010 
1011   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1012 
1013   baij->rowvalues = 0;
1014 
1015   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1016   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
1017     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1018     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1019   }
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
1024 #include <petscdraw.h>
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1027 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1028 {
1029   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1030   PetscErrorCode    ierr;
1031   PetscMPIInt       rank = baij->rank;
1032   PetscInt          bs   = mat->rmap->bs;
1033   PetscBool         iascii,isdraw;
1034   PetscViewer       sviewer;
1035   PetscViewerFormat format;
1036 
1037   PetscFunctionBegin;
1038   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1039   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1040   if (iascii) {
1041     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1042     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1043       MatInfo info;
1044       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1045       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1046       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1047       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1048                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1049       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1050       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1051       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1052       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1053       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1054       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1055       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1056       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1057       PetscFunctionReturn(0);
1058     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1059       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1060       PetscFunctionReturn(0);
1061     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1062       PetscFunctionReturn(0);
1063     }
1064   }
1065 
1066   if (isdraw) {
1067     PetscDraw draw;
1068     PetscBool isnull;
1069     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1070     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1071   }
1072 
1073   {
1074     /* assemble the entire matrix onto first processor. */
1075     Mat         A;
1076     Mat_SeqBAIJ *Aloc;
1077     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1078     MatScalar   *a;
1079     const char  *matname;
1080 
1081     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1082     /* Perhaps this should be the type of mat? */
1083     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1084     if (!rank) {
1085       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1086     } else {
1087       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1088     }
1089     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1090     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1091     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1092     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1093 
1094     /* copy over the A part */
1095     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1096     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1097     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1098 
1099     for (i=0; i<mbs; i++) {
1100       rvals[0] = bs*(baij->rstartbs + i);
1101       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1102       for (j=ai[i]; j<ai[i+1]; j++) {
1103         col = (baij->cstartbs+aj[j])*bs;
1104         for (k=0; k<bs; k++) {
1105           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1106           col++; a += bs;
1107         }
1108       }
1109     }
1110     /* copy over the B part */
1111     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1112     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1113     for (i=0; i<mbs; i++) {
1114       rvals[0] = bs*(baij->rstartbs + i);
1115       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1116       for (j=ai[i]; j<ai[i+1]; j++) {
1117         col = baij->garray[aj[j]]*bs;
1118         for (k=0; k<bs; k++) {
1119           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1120           col++; a += bs;
1121         }
1122       }
1123     }
1124     ierr = PetscFree(rvals);CHKERRQ(ierr);
1125     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1126     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1127     /*
1128        Everyone has to call to draw the matrix since the graphics waits are
1129        synchronized across all processors that share the PetscDraw object
1130     */
1131     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1132     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1133     if (!rank) {
1134       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1135       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1136     }
1137     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1138     ierr = MatDestroy(&A);CHKERRQ(ierr);
1139   }
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1145 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1146 {
1147   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1148   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1149   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1150   PetscErrorCode ierr;
1151   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1152   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1153   int            fd;
1154   PetscScalar    *column_values;
1155   FILE           *file;
1156   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1157   PetscInt       message_count,flowcontrolcount;
1158 
1159   PetscFunctionBegin;
1160   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1161   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1162   nz   = bs2*(A->nz + B->nz);
1163   rlen = mat->rmap->n;
1164   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1165   if (!rank) {
1166     header[0] = MAT_FILE_CLASSID;
1167     header[1] = mat->rmap->N;
1168     header[2] = mat->cmap->N;
1169 
1170     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1171     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1172     /* get largest number of rows any processor has */
1173     range = mat->rmap->range;
1174     for (i=1; i<size; i++) {
1175       rlen = PetscMax(rlen,range[i+1] - range[i]);
1176     }
1177   } else {
1178     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1179   }
1180 
1181   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1182   /* compute lengths of each row  */
1183   for (i=0; i<a->mbs; i++) {
1184     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1185   }
1186   /* store the row lengths to the file */
1187   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1188   if (!rank) {
1189     MPI_Status status;
1190     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1191     rlen = (range[1] - range[0])/bs;
1192     for (i=0; i<rlen; i++) {
1193       for (j=0; j<bs; j++) {
1194         row_lens[i*bs+j] = bs*crow_lens[i];
1195       }
1196     }
1197     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1198     for (i=1; i<size; i++) {
1199       rlen = (range[i+1] - range[i])/bs;
1200       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1201       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1202       for (k=0; k<rlen; k++) {
1203         for (j=0; j<bs; j++) {
1204           row_lens[k*bs+j] = bs*crow_lens[k];
1205         }
1206       }
1207       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1208     }
1209     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1210     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1211   } else {
1212     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1213     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1214     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1215   }
1216   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1217 
1218   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1219      information needed to make it for each row from a block row. This does require more communication but still not more than
1220      the communication needed for the nonzero values  */
1221   nzmax = nz; /*  space a largest processor needs */
1222   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1223   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1224   cnt   = 0;
1225   for (i=0; i<a->mbs; i++) {
1226     pcnt = cnt;
1227     for (j=B->i[i]; j<B->i[i+1]; j++) {
1228       if ((col = garray[B->j[j]]) > cstart) break;
1229       for (l=0; l<bs; l++) {
1230         column_indices[cnt++] = bs*col+l;
1231       }
1232     }
1233     for (k=A->i[i]; k<A->i[i+1]; k++) {
1234       for (l=0; l<bs; l++) {
1235         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1236       }
1237     }
1238     for (; j<B->i[i+1]; j++) {
1239       for (l=0; l<bs; l++) {
1240         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1241       }
1242     }
1243     len = cnt - pcnt;
1244     for (k=1; k<bs; k++) {
1245       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1246       cnt += len;
1247     }
1248   }
1249   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1250 
1251   /* store the columns to the file */
1252   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1253   if (!rank) {
1254     MPI_Status status;
1255     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1256     for (i=1; i<size; i++) {
1257       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1258       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1259       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1260       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1261     }
1262     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1263   } else {
1264     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1265     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1266     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1267     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1268   }
1269   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1270 
1271   /* load up the numerical values */
1272   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1273   cnt  = 0;
1274   for (i=0; i<a->mbs; i++) {
1275     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1276     for (j=B->i[i]; j<B->i[i+1]; j++) {
1277       if (garray[B->j[j]] > cstart) break;
1278       for (l=0; l<bs; l++) {
1279         for (ll=0; ll<bs; ll++) {
1280           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1281         }
1282       }
1283       cnt += bs;
1284     }
1285     for (k=A->i[i]; k<A->i[i+1]; k++) {
1286       for (l=0; l<bs; l++) {
1287         for (ll=0; ll<bs; ll++) {
1288           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1289         }
1290       }
1291       cnt += bs;
1292     }
1293     for (; j<B->i[i+1]; j++) {
1294       for (l=0; l<bs; l++) {
1295         for (ll=0; ll<bs; ll++) {
1296           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1297         }
1298       }
1299       cnt += bs;
1300     }
1301     cnt += (bs-1)*rlen;
1302   }
1303   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1304 
1305   /* store the column values to the file */
1306   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1307   if (!rank) {
1308     MPI_Status status;
1309     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1310     for (i=1; i<size; i++) {
1311       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1312       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1313       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1314       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1315     }
1316     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1317   } else {
1318     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1319     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1320     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1321     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1322   }
1323   ierr = PetscFree(column_values);CHKERRQ(ierr);
1324 
1325   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1326   if (file) {
1327     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1328   }
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatView_MPIBAIJ"
1334 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1335 {
1336   PetscErrorCode ierr;
1337   PetscBool      iascii,isdraw,issocket,isbinary;
1338 
1339   PetscFunctionBegin;
1340   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1341   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1342   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1343   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1344   if (iascii || isdraw || issocket) {
1345     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1346   } else if (isbinary) {
1347     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1348   }
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1354 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1355 {
1356   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1357   PetscErrorCode ierr;
1358 
1359   PetscFunctionBegin;
1360 #if defined(PETSC_USE_LOG)
1361   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1362 #endif
1363   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1364   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1365   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1366   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1367 #if defined(PETSC_USE_CTABLE)
1368   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1369 #else
1370   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1371 #endif
1372   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1373   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1374   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1375   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1376   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1377   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1378   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1379   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1380 
1381   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1382   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1383   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1384   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1385   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1386   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1387   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1388   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1389   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1390   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 #undef __FUNCT__
1395 #define __FUNCT__ "MatMult_MPIBAIJ"
1396 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1397 {
1398   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1399   PetscErrorCode ierr;
1400   PetscInt       nt;
1401 
1402   PetscFunctionBegin;
1403   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1404   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1405   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1406   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1407   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1408   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1409   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1410   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1416 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1417 {
1418   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1419   PetscErrorCode ierr;
1420 
1421   PetscFunctionBegin;
1422   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1423   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1424   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1425   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1431 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1432 {
1433   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1434   PetscErrorCode ierr;
1435   PetscBool      merged;
1436 
1437   PetscFunctionBegin;
1438   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1439   /* do nondiagonal part */
1440   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1441   if (!merged) {
1442     /* send it on its way */
1443     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1444     /* do local part */
1445     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1446     /* receive remote parts: note this assumes the values are not actually */
1447     /* inserted in yy until the next line */
1448     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1449   } else {
1450     /* do local part */
1451     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1452     /* send it on its way */
1453     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1454     /* values actually were received in the Begin() but we need to call this nop */
1455     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1456   }
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 #undef __FUNCT__
1461 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1462 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1463 {
1464   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1465   PetscErrorCode ierr;
1466 
1467   PetscFunctionBegin;
1468   /* do nondiagonal part */
1469   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1470   /* send it on its way */
1471   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1472   /* do local part */
1473   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1474   /* receive remote parts: note this assumes the values are not actually */
1475   /* inserted in yy until the next line, which is true for my implementation*/
1476   /* but is not perhaps always true. */
1477   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 /*
1482   This only works correctly for square matrices where the subblock A->A is the
1483    diagonal block
1484 */
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1487 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1488 {
1489   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1490   PetscErrorCode ierr;
1491 
1492   PetscFunctionBegin;
1493   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1494   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 #undef __FUNCT__
1499 #define __FUNCT__ "MatScale_MPIBAIJ"
1500 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1501 {
1502   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1503   PetscErrorCode ierr;
1504 
1505   PetscFunctionBegin;
1506   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1507   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 #undef __FUNCT__
1512 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1513 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1514 {
1515   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1516   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1517   PetscErrorCode ierr;
1518   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1519   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1520   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1521 
1522   PetscFunctionBegin;
1523   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1524   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1525   mat->getrowactive = PETSC_TRUE;
1526 
1527   if (!mat->rowvalues && (idx || v)) {
1528     /*
1529         allocate enough space to hold information from the longest row.
1530     */
1531     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1532     PetscInt    max = 1,mbs = mat->mbs,tmp;
1533     for (i=0; i<mbs; i++) {
1534       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1535       if (max < tmp) max = tmp;
1536     }
1537     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1538   }
1539   lrow = row - brstart;
1540 
1541   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1542   if (!v)   {pvA = 0; pvB = 0;}
1543   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1544   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1545   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1546   nztot = nzA + nzB;
1547 
1548   cmap = mat->garray;
1549   if (v  || idx) {
1550     if (nztot) {
1551       /* Sort by increasing column numbers, assuming A and B already sorted */
1552       PetscInt imark = -1;
1553       if (v) {
1554         *v = v_p = mat->rowvalues;
1555         for (i=0; i<nzB; i++) {
1556           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1557           else break;
1558         }
1559         imark = i;
1560         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1561         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1562       }
1563       if (idx) {
1564         *idx = idx_p = mat->rowindices;
1565         if (imark > -1) {
1566           for (i=0; i<imark; i++) {
1567             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1568           }
1569         } else {
1570           for (i=0; i<nzB; i++) {
1571             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1572             else break;
1573           }
1574           imark = i;
1575         }
1576         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1577         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1578       }
1579     } else {
1580       if (idx) *idx = 0;
1581       if (v)   *v   = 0;
1582     }
1583   }
1584   *nz  = nztot;
1585   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1586   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1592 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1593 {
1594   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1595 
1596   PetscFunctionBegin;
1597   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1598   baij->getrowactive = PETSC_FALSE;
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1604 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1605 {
1606   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1607   PetscErrorCode ierr;
1608 
1609   PetscFunctionBegin;
1610   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1611   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 #undef __FUNCT__
1616 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1617 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1618 {
1619   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1620   Mat            A  = a->A,B = a->B;
1621   PetscErrorCode ierr;
1622   PetscReal      isend[5],irecv[5];
1623 
1624   PetscFunctionBegin;
1625   info->block_size = (PetscReal)matin->rmap->bs;
1626 
1627   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1628 
1629   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1630   isend[3] = info->memory;  isend[4] = info->mallocs;
1631 
1632   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1633 
1634   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1635   isend[3] += info->memory;  isend[4] += info->mallocs;
1636 
1637   if (flag == MAT_LOCAL) {
1638     info->nz_used      = isend[0];
1639     info->nz_allocated = isend[1];
1640     info->nz_unneeded  = isend[2];
1641     info->memory       = isend[3];
1642     info->mallocs      = isend[4];
1643   } else if (flag == MAT_GLOBAL_MAX) {
1644     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1645 
1646     info->nz_used      = irecv[0];
1647     info->nz_allocated = irecv[1];
1648     info->nz_unneeded  = irecv[2];
1649     info->memory       = irecv[3];
1650     info->mallocs      = irecv[4];
1651   } else if (flag == MAT_GLOBAL_SUM) {
1652     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1653 
1654     info->nz_used      = irecv[0];
1655     info->nz_allocated = irecv[1];
1656     info->nz_unneeded  = irecv[2];
1657     info->memory       = irecv[3];
1658     info->mallocs      = irecv[4];
1659   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1660   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1661   info->fill_ratio_needed = 0;
1662   info->factor_mallocs    = 0;
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 #undef __FUNCT__
1667 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1668 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1669 {
1670   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1671   PetscErrorCode ierr;
1672 
1673   PetscFunctionBegin;
1674   switch (op) {
1675   case MAT_NEW_NONZERO_LOCATIONS:
1676   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1677   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1678   case MAT_KEEP_NONZERO_PATTERN:
1679   case MAT_NEW_NONZERO_LOCATION_ERR:
1680     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1681     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1682     break;
1683   case MAT_ROW_ORIENTED:
1684     a->roworiented = flg;
1685 
1686     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1687     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1688     break;
1689   case MAT_NEW_DIAGONALS:
1690     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1691     break;
1692   case MAT_IGNORE_OFF_PROC_ENTRIES:
1693     a->donotstash = flg;
1694     break;
1695   case MAT_USE_HASH_TABLE:
1696     a->ht_flag = flg;
1697     break;
1698   case MAT_SYMMETRIC:
1699   case MAT_STRUCTURALLY_SYMMETRIC:
1700   case MAT_HERMITIAN:
1701   case MAT_SYMMETRY_ETERNAL:
1702     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1703     break;
1704   default:
1705     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1706   }
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1712 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1713 {
1714   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1715   Mat_SeqBAIJ    *Aloc;
1716   Mat            B;
1717   PetscErrorCode ierr;
1718   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1719   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1720   MatScalar      *a;
1721 
1722   PetscFunctionBegin;
1723   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1724   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1725     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1726     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1727     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1728     /* Do not know preallocation information, but must set block size */
1729     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1730   } else {
1731     B = *matout;
1732   }
1733 
1734   /* copy over the A part */
1735   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1736   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1737   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1738 
1739   for (i=0; i<mbs; i++) {
1740     rvals[0] = bs*(baij->rstartbs + i);
1741     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1742     for (j=ai[i]; j<ai[i+1]; j++) {
1743       col = (baij->cstartbs+aj[j])*bs;
1744       for (k=0; k<bs; k++) {
1745         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1746 
1747         col++; a += bs;
1748       }
1749     }
1750   }
1751   /* copy over the B part */
1752   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1753   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1754   for (i=0; i<mbs; i++) {
1755     rvals[0] = bs*(baij->rstartbs + i);
1756     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1757     for (j=ai[i]; j<ai[i+1]; j++) {
1758       col = baij->garray[aj[j]]*bs;
1759       for (k=0; k<bs; k++) {
1760         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1761         col++;
1762         a += bs;
1763       }
1764     }
1765   }
1766   ierr = PetscFree(rvals);CHKERRQ(ierr);
1767   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1768   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1769 
1770   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1771   else {
1772     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1773   }
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 #undef __FUNCT__
1778 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1779 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1780 {
1781   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1782   Mat            a     = baij->A,b = baij->B;
1783   PetscErrorCode ierr;
1784   PetscInt       s1,s2,s3;
1785 
1786   PetscFunctionBegin;
1787   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1788   if (rr) {
1789     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1790     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1791     /* Overlap communication with computation. */
1792     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1793   }
1794   if (ll) {
1795     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1796     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1797     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1798   }
1799   /* scale  the diagonal block */
1800   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1801 
1802   if (rr) {
1803     /* Do a scatter end and then right scale the off-diagonal block */
1804     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1805     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1806   }
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1812 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1813 {
1814   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1815   PetscInt      *owners = A->rmap->range;
1816   PetscInt       n      = A->rmap->n;
1817   PetscSF        sf;
1818   PetscInt      *lrows;
1819   PetscSFNode   *rrows;
1820   PetscInt       r, p = 0, len = 0;
1821   PetscErrorCode ierr;
1822 
1823   PetscFunctionBegin;
1824   /* Create SF where leaves are input rows and roots are owned rows */
1825   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1826   for (r = 0; r < n; ++r) lrows[r] = -1;
1827   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
1828   for (r = 0; r < N; ++r) {
1829     const PetscInt idx   = rows[r];
1830     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1831     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1832       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1833     }
1834     if (A->nooffproczerorows) {
1835       if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank);
1836       lrows[len++] = idx - owners[p];
1837     } else {
1838       rrows[r].rank = p;
1839       rrows[r].index = rows[r] - owners[p];
1840     }
1841   }
1842   if (!A->nooffproczerorows) {
1843     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1844     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1845     /* Collect flags for rows to be zeroed */
1846     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1847     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1848     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1849     /* Compress and put in row numbers */
1850     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1851   }
1852   /* fix right hand side if needed */
1853   if (x && b) {
1854     const PetscScalar *xx;
1855     PetscScalar       *bb;
1856 
1857     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1858     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1859     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1860     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1861     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1862   }
1863 
1864   /* actually zap the local rows */
1865   /*
1866         Zero the required rows. If the "diagonal block" of the matrix
1867      is square and the user wishes to set the diagonal we use separate
1868      code so that MatSetValues() is not called for each diagonal allocating
1869      new memory, thus calling lots of mallocs and slowing things down.
1870 
1871   */
1872   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1873   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1874   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1875     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1876   } else if (diag != 0.0) {
1877     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1878     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1879        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1880     for (r = 0; r < len; ++r) {
1881       const PetscInt row = lrows[r] + A->rmap->rstart;
1882       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1883     }
1884     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1885     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1886   } else {
1887     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1888   }
1889   ierr = PetscFree(lrows);CHKERRQ(ierr);
1890 
1891   /* only change matrix nonzero state if pattern was allowed to be changed */
1892   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1893     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1894     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1895   }
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1901 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1902 {
1903   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1904   PetscErrorCode    ierr;
1905   PetscMPIInt       n = A->rmap->n;
1906   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1907   PetscInt          *lrows,*owners = A->rmap->range;
1908   PetscSFNode       *rrows;
1909   PetscSF           sf;
1910   const PetscScalar *xx;
1911   PetscScalar       *bb,*mask;
1912   Vec               xmask,lmask;
1913   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1914   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1915   PetscScalar       *aa;
1916 
1917   PetscFunctionBegin;
1918   /* Create SF where leaves are input rows and roots are owned rows */
1919   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1920   for (r = 0; r < n; ++r) lrows[r] = -1;
1921   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1922   for (r = 0; r < N; ++r) {
1923     const PetscInt idx   = rows[r];
1924     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1925     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1926       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1927     }
1928     rrows[r].rank  = p;
1929     rrows[r].index = rows[r] - owners[p];
1930   }
1931   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1932   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1933   /* Collect flags for rows to be zeroed */
1934   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1935   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1936   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1937   /* Compress and put in row numbers */
1938   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1939   /* zero diagonal part of matrix */
1940   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1941   /* handle off diagonal part of matrix */
1942   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1943   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1944   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1945   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1946   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1947   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1948   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1949   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1950   if (x) {
1951     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1952     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1953     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1954     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1955   }
1956   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1957   /* remove zeroed rows of off diagonal matrix */
1958   for (i = 0; i < len; ++i) {
1959     row   = lrows[i];
1960     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1961     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1962     for (k = 0; k < count; ++k) {
1963       aa[0] = 0.0;
1964       aa   += bs;
1965     }
1966   }
1967   /* loop over all elements of off process part of matrix zeroing removed columns*/
1968   for (i = 0; i < l->B->rmap->N; ++i) {
1969     row = i/bs;
1970     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1971       for (k = 0; k < bs; ++k) {
1972         col = bs*baij->j[j] + k;
1973         if (PetscAbsScalar(mask[col])) {
1974           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1975           if (b) bb[i] -= aa[0]*xx[col];
1976           aa[0] = 0.0;
1977         }
1978       }
1979     }
1980   }
1981   if (x) {
1982     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1983     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1984   }
1985   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1986   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1987   ierr = PetscFree(lrows);CHKERRQ(ierr);
1988 
1989   /* only change matrix nonzero state if pattern was allowed to be changed */
1990   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1991     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1992     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1993   }
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1999 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
2000 {
2001   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2002   PetscErrorCode ierr;
2003 
2004   PetscFunctionBegin;
2005   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
2010 
2011 #undef __FUNCT__
2012 #define __FUNCT__ "MatEqual_MPIBAIJ"
2013 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
2014 {
2015   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
2016   Mat            a,b,c,d;
2017   PetscBool      flg;
2018   PetscErrorCode ierr;
2019 
2020   PetscFunctionBegin;
2021   a = matA->A; b = matA->B;
2022   c = matB->A; d = matB->B;
2023 
2024   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2025   if (flg) {
2026     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2027   }
2028   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2029   PetscFunctionReturn(0);
2030 }
2031 
2032 #undef __FUNCT__
2033 #define __FUNCT__ "MatCopy_MPIBAIJ"
2034 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
2035 {
2036   PetscErrorCode ierr;
2037   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2038   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2039 
2040   PetscFunctionBegin;
2041   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2042   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2043     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2044   } else {
2045     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2046     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2047   }
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 #undef __FUNCT__
2052 #define __FUNCT__ "MatSetUp_MPIBAIJ"
2053 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
2054 {
2055   PetscErrorCode ierr;
2056 
2057   PetscFunctionBegin;
2058   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2059   PetscFunctionReturn(0);
2060 }
2061 
2062 #undef __FUNCT__
2063 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ"
2064 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2065 {
2066   PetscErrorCode ierr;
2067   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
2068   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2069   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2070 
2071   PetscFunctionBegin;
2072   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatAXPY_MPIBAIJ"
2078 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2079 {
2080   PetscErrorCode ierr;
2081   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
2082   PetscBLASInt   bnz,one=1;
2083   Mat_SeqBAIJ    *x,*y;
2084 
2085   PetscFunctionBegin;
2086   if (str == SAME_NONZERO_PATTERN) {
2087     PetscScalar alpha = a;
2088     x    = (Mat_SeqBAIJ*)xx->A->data;
2089     y    = (Mat_SeqBAIJ*)yy->A->data;
2090     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2091     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2092     x    = (Mat_SeqBAIJ*)xx->B->data;
2093     y    = (Mat_SeqBAIJ*)yy->B->data;
2094     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2095     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2096     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2097   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2098     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2099   } else {
2100     Mat      B;
2101     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
2102     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2103     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2104     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2105     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2106     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2107     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2108     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
2109     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2110     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2111     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2112     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
2113     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2114     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2115     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2116     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2117   }
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 #undef __FUNCT__
2122 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2123 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2124 {
2125   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2126   PetscErrorCode ierr;
2127 
2128   PetscFunctionBegin;
2129   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2130   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2136 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2137 {
2138   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2139   PetscErrorCode ierr;
2140 
2141   PetscFunctionBegin;
2142   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2143   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 #undef __FUNCT__
2148 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2149 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2150 {
2151   PetscErrorCode ierr;
2152   IS             iscol_local;
2153   PetscInt       csize;
2154 
2155   PetscFunctionBegin;
2156   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2157   if (call == MAT_REUSE_MATRIX) {
2158     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2159     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2160   } else {
2161     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2162   }
2163   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2164   if (call == MAT_INITIAL_MATRIX) {
2165     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2166     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2167   }
2168   PetscFunctionReturn(0);
2169 }
2170 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2171 #undef __FUNCT__
2172 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2173 /*
2174   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2175   in local and then by concatenating the local matrices the end result.
2176   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ().
2177   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
2178 */
2179 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2180 {
2181   PetscErrorCode ierr;
2182   PetscMPIInt    rank,size;
2183   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2184   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2185   Mat            M,Mreuse;
2186   MatScalar      *vwork,*aa;
2187   MPI_Comm       comm;
2188   IS             isrow_new, iscol_new;
2189   PetscBool      idflag,allrows, allcols;
2190   Mat_SeqBAIJ    *aij;
2191 
2192   PetscFunctionBegin;
2193   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2196   /* The compression and expansion should be avoided. Doesn't point
2197      out errors, might change the indices, hence buggey */
2198   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2199   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2200 
2201   /* Check for special case: each processor gets entire matrix columns */
2202   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2203   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2204   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2205   else allcols = PETSC_FALSE;
2206 
2207   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2208   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2209   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2210   else allrows = PETSC_FALSE;
2211 
2212   if (call ==  MAT_REUSE_MATRIX) {
2213     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2214     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2215     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2216   } else {
2217     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2218   }
2219   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2220   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2221   /*
2222       m - number of local rows
2223       n - number of columns (same on all processors)
2224       rstart - first row in new global matrix generated
2225   */
2226   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2227   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2228   m    = m/bs;
2229   n    = n/bs;
2230 
2231   if (call == MAT_INITIAL_MATRIX) {
2232     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2233     ii  = aij->i;
2234     jj  = aij->j;
2235 
2236     /*
2237         Determine the number of non-zeros in the diagonal and off-diagonal
2238         portions of the matrix in order to do correct preallocation
2239     */
2240 
2241     /* first get start and end of "diagonal" columns */
2242     if (csize == PETSC_DECIDE) {
2243       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2244       if (mglobal == n*bs) { /* square matrix */
2245         nlocal = m;
2246       } else {
2247         nlocal = n/size + ((n % size) > rank);
2248       }
2249     } else {
2250       nlocal = csize/bs;
2251     }
2252     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2253     rstart = rend - nlocal;
2254     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2255 
2256     /* next, compute all the lengths */
2257     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2258     for (i=0; i<m; i++) {
2259       jend = ii[i+1] - ii[i];
2260       olen = 0;
2261       dlen = 0;
2262       for (j=0; j<jend; j++) {
2263         if (*jj < rstart || *jj >= rend) olen++;
2264         else dlen++;
2265         jj++;
2266       }
2267       olens[i] = olen;
2268       dlens[i] = dlen;
2269     }
2270     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2271     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2272     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2273     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2274     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2275     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2276   } else {
2277     PetscInt ml,nl;
2278 
2279     M    = *newmat;
2280     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2281     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2282     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2283     /*
2284          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2285        rather than the slower MatSetValues().
2286     */
2287     M->was_assembled = PETSC_TRUE;
2288     M->assembled     = PETSC_FALSE;
2289   }
2290   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2291   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2292   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2293   ii   = aij->i;
2294   jj   = aij->j;
2295   aa   = aij->a;
2296   for (i=0; i<m; i++) {
2297     row   = rstart/bs + i;
2298     nz    = ii[i+1] - ii[i];
2299     cwork = jj;     jj += nz;
2300     vwork = aa;     aa += nz*bs*bs;
2301     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2302   }
2303 
2304   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2305   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2306   *newmat = M;
2307 
2308   /* save submatrix used in processor for next request */
2309   if (call ==  MAT_INITIAL_MATRIX) {
2310     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2311     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2312   }
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 #undef __FUNCT__
2317 #define __FUNCT__ "MatPermute_MPIBAIJ"
2318 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2319 {
2320   MPI_Comm       comm,pcomm;
2321   PetscInt       clocal_size,nrows;
2322   const PetscInt *rows;
2323   PetscMPIInt    size;
2324   IS             crowp,lcolp;
2325   PetscErrorCode ierr;
2326 
2327   PetscFunctionBegin;
2328   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2329   /* make a collective version of 'rowp' */
2330   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2331   if (pcomm==comm) {
2332     crowp = rowp;
2333   } else {
2334     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2335     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2336     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2337     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2338   }
2339   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2340   /* make a local version of 'colp' */
2341   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2342   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2343   if (size==1) {
2344     lcolp = colp;
2345   } else {
2346     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2347   }
2348   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2349   /* now we just get the submatrix */
2350   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2351   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2352   /* clean up */
2353   if (pcomm!=comm) {
2354     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2355   }
2356   if (size>1) {
2357     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2358   }
2359   PetscFunctionReturn(0);
2360 }
2361 
2362 #undef __FUNCT__
2363 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2364 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2365 {
2366   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2367   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2368 
2369   PetscFunctionBegin;
2370   if (nghosts) *nghosts = B->nbs;
2371   if (ghosts) *ghosts = baij->garray;
2372   PetscFunctionReturn(0);
2373 }
2374 
2375 #undef __FUNCT__
2376 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2377 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2378 {
2379   Mat            B;
2380   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2381   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2382   Mat_SeqAIJ     *b;
2383   PetscErrorCode ierr;
2384   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2385   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2386   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2387 
2388   PetscFunctionBegin;
2389   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2390   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2391 
2392   /* ----------------------------------------------------------------
2393      Tell every processor the number of nonzeros per row
2394   */
2395   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2396   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2397     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2398   }
2399   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2400   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2401   displs    = recvcounts + size;
2402   for (i=0; i<size; i++) {
2403     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2404     displs[i]     = A->rmap->range[i]/bs;
2405   }
2406 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2407   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2408 #else
2409   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2410 #endif
2411   /* ---------------------------------------------------------------
2412      Create the sequential matrix of the same type as the local block diagonal
2413   */
2414   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2415   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2416   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2417   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2418   b    = (Mat_SeqAIJ*)B->data;
2419 
2420   /*--------------------------------------------------------------------
2421     Copy my part of matrix column indices over
2422   */
2423   sendcount  = ad->nz + bd->nz;
2424   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2425   a_jsendbuf = ad->j;
2426   b_jsendbuf = bd->j;
2427   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2428   cnt        = 0;
2429   for (i=0; i<n; i++) {
2430 
2431     /* put in lower diagonal portion */
2432     m = bd->i[i+1] - bd->i[i];
2433     while (m > 0) {
2434       /* is it above diagonal (in bd (compressed) numbering) */
2435       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2436       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2437       m--;
2438     }
2439 
2440     /* put in diagonal portion */
2441     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2442       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2443     }
2444 
2445     /* put in upper diagonal portion */
2446     while (m-- > 0) {
2447       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2448     }
2449   }
2450   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2451 
2452   /*--------------------------------------------------------------------
2453     Gather all column indices to all processors
2454   */
2455   for (i=0; i<size; i++) {
2456     recvcounts[i] = 0;
2457     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2458       recvcounts[i] += lens[j];
2459     }
2460   }
2461   displs[0] = 0;
2462   for (i=1; i<size; i++) {
2463     displs[i] = displs[i-1] + recvcounts[i-1];
2464   }
2465 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2466   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2467 #else
2468   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2469 #endif
2470   /*--------------------------------------------------------------------
2471     Assemble the matrix into useable form (note numerical values not yet set)
2472   */
2473   /* set the b->ilen (length of each row) values */
2474   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2475   /* set the b->i indices */
2476   b->i[0] = 0;
2477   for (i=1; i<=A->rmap->N/bs; i++) {
2478     b->i[i] = b->i[i-1] + lens[i-1];
2479   }
2480   ierr = PetscFree(lens);CHKERRQ(ierr);
2481   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2482   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2483   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2484 
2485   if (A->symmetric) {
2486     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2487   } else if (A->hermitian) {
2488     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2489   } else if (A->structurally_symmetric) {
2490     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2491   }
2492   *newmat = B;
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 #undef __FUNCT__
2497 #define __FUNCT__ "MatSOR_MPIBAIJ"
2498 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2499 {
2500   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2501   PetscErrorCode ierr;
2502   Vec            bb1 = 0;
2503 
2504   PetscFunctionBegin;
2505   if (flag == SOR_APPLY_UPPER) {
2506     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2507     PetscFunctionReturn(0);
2508   }
2509 
2510   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2511     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2512   }
2513 
2514   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2515     if (flag & SOR_ZERO_INITIAL_GUESS) {
2516       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2517       its--;
2518     }
2519 
2520     while (its--) {
2521       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2522       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2523 
2524       /* update rhs: bb1 = bb - B*x */
2525       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2526       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2527 
2528       /* local sweep */
2529       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2530     }
2531   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2532     if (flag & SOR_ZERO_INITIAL_GUESS) {
2533       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2534       its--;
2535     }
2536     while (its--) {
2537       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2538       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2539 
2540       /* update rhs: bb1 = bb - B*x */
2541       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2542       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2543 
2544       /* local sweep */
2545       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2546     }
2547   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2548     if (flag & SOR_ZERO_INITIAL_GUESS) {
2549       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2550       its--;
2551     }
2552     while (its--) {
2553       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2554       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2555 
2556       /* update rhs: bb1 = bb - B*x */
2557       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2558       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2559 
2560       /* local sweep */
2561       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2562     }
2563   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2564 
2565   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 #undef __FUNCT__
2570 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2571 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2572 {
2573   PetscErrorCode ierr;
2574   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2575   PetscInt       N,i,*garray = aij->garray;
2576   PetscInt       ib,jb,bs = A->rmap->bs;
2577   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2578   MatScalar      *a_val = a_aij->a;
2579   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2580   MatScalar      *b_val = b_aij->a;
2581   PetscReal      *work;
2582 
2583   PetscFunctionBegin;
2584   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2585   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2586   if (type == NORM_2) {
2587     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2588       for (jb=0; jb<bs; jb++) {
2589         for (ib=0; ib<bs; ib++) {
2590           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2591           a_val++;
2592         }
2593       }
2594     }
2595     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2596       for (jb=0; jb<bs; jb++) {
2597         for (ib=0; ib<bs; ib++) {
2598           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2599           b_val++;
2600         }
2601       }
2602     }
2603   } else if (type == NORM_1) {
2604     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2605       for (jb=0; jb<bs; jb++) {
2606         for (ib=0; ib<bs; ib++) {
2607           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2608           a_val++;
2609         }
2610       }
2611     }
2612     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2613       for (jb=0; jb<bs; jb++) {
2614        for (ib=0; ib<bs; ib++) {
2615           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2616           b_val++;
2617         }
2618       }
2619     }
2620   } else if (type == NORM_INFINITY) {
2621     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2622       for (jb=0; jb<bs; jb++) {
2623         for (ib=0; ib<bs; ib++) {
2624           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2625           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2626           a_val++;
2627         }
2628       }
2629     }
2630     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2631       for (jb=0; jb<bs; jb++) {
2632         for (ib=0; ib<bs; ib++) {
2633           int col = garray[b_aij->j[i]] * bs + jb;
2634           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2635           b_val++;
2636         }
2637       }
2638     }
2639   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2640   if (type == NORM_INFINITY) {
2641     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2642   } else {
2643     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2644   }
2645   ierr = PetscFree(work);CHKERRQ(ierr);
2646   if (type == NORM_2) {
2647     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2648   }
2649   PetscFunctionReturn(0);
2650 }
2651 
2652 #undef __FUNCT__
2653 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2654 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2655 {
2656   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2657   PetscErrorCode ierr;
2658 
2659   PetscFunctionBegin;
2660   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 #undef __FUNCT__
2665 #define __FUNCT__ "MatShift_MPIBAIJ"
2666 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2667 {
2668   PetscErrorCode ierr;
2669   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2670   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data,*bij = (Mat_SeqBAIJ*)maij->B->data;
2671 
2672   PetscFunctionBegin;
2673   if (!aij->nz && !bij->nz) {
2674     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2675   }
2676   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2677   PetscFunctionReturn(0);
2678 }
2679 
2680 /* -------------------------------------------------------------------*/
2681 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2682                                        MatGetRow_MPIBAIJ,
2683                                        MatRestoreRow_MPIBAIJ,
2684                                        MatMult_MPIBAIJ,
2685                                 /* 4*/ MatMultAdd_MPIBAIJ,
2686                                        MatMultTranspose_MPIBAIJ,
2687                                        MatMultTransposeAdd_MPIBAIJ,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                 /*10*/ 0,
2692                                        0,
2693                                        0,
2694                                        MatSOR_MPIBAIJ,
2695                                        MatTranspose_MPIBAIJ,
2696                                 /*15*/ MatGetInfo_MPIBAIJ,
2697                                        MatEqual_MPIBAIJ,
2698                                        MatGetDiagonal_MPIBAIJ,
2699                                        MatDiagonalScale_MPIBAIJ,
2700                                        MatNorm_MPIBAIJ,
2701                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2702                                        MatAssemblyEnd_MPIBAIJ,
2703                                        MatSetOption_MPIBAIJ,
2704                                        MatZeroEntries_MPIBAIJ,
2705                                 /*24*/ MatZeroRows_MPIBAIJ,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                 /*29*/ MatSetUp_MPIBAIJ,
2711                                        0,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                 /*34*/ MatDuplicate_MPIBAIJ,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                 /*39*/ MatAXPY_MPIBAIJ,
2721                                        MatGetSubMatrices_MPIBAIJ,
2722                                        MatIncreaseOverlap_MPIBAIJ,
2723                                        MatGetValues_MPIBAIJ,
2724                                        MatCopy_MPIBAIJ,
2725                                 /*44*/ 0,
2726                                        MatScale_MPIBAIJ,
2727                                        MatShift_MPIBAIJ,
2728                                        0,
2729                                        MatZeroRowsColumns_MPIBAIJ,
2730                                 /*49*/ 0,
2731                                        0,
2732                                        0,
2733                                        0,
2734                                        0,
2735                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2736                                        0,
2737                                        MatSetUnfactored_MPIBAIJ,
2738                                        MatPermute_MPIBAIJ,
2739                                        MatSetValuesBlocked_MPIBAIJ,
2740                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2741                                        MatDestroy_MPIBAIJ,
2742                                        MatView_MPIBAIJ,
2743                                        0,
2744                                        0,
2745                                 /*64*/ 0,
2746                                        0,
2747                                        0,
2748                                        0,
2749                                        0,
2750                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2751                                        0,
2752                                        0,
2753                                        0,
2754                                        0,
2755                                 /*74*/ 0,
2756                                        MatFDColoringApply_BAIJ,
2757                                        0,
2758                                        0,
2759                                        0,
2760                                 /*79*/ 0,
2761                                        0,
2762                                        0,
2763                                        0,
2764                                        MatLoad_MPIBAIJ,
2765                                 /*84*/ 0,
2766                                        0,
2767                                        0,
2768                                        0,
2769                                        0,
2770                                 /*89*/ 0,
2771                                        0,
2772                                        0,
2773                                        0,
2774                                        0,
2775                                 /*94*/ 0,
2776                                        0,
2777                                        0,
2778                                        0,
2779                                        0,
2780                                 /*99*/ 0,
2781                                        0,
2782                                        0,
2783                                        0,
2784                                        0,
2785                                 /*104*/0,
2786                                        MatRealPart_MPIBAIJ,
2787                                        MatImaginaryPart_MPIBAIJ,
2788                                        0,
2789                                        0,
2790                                 /*109*/0,
2791                                        0,
2792                                        0,
2793                                        0,
2794                                        0,
2795                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2796                                        0,
2797                                        MatGetGhosts_MPIBAIJ,
2798                                        0,
2799                                        0,
2800                                 /*119*/0,
2801                                        0,
2802                                        0,
2803                                        0,
2804                                        MatGetMultiProcBlock_MPIBAIJ,
2805                                 /*124*/0,
2806                                        MatGetColumnNorms_MPIBAIJ,
2807                                        MatInvertBlockDiagonal_MPIBAIJ,
2808                                        0,
2809                                        0,
2810                                /*129*/ 0,
2811                                        0,
2812                                        0,
2813                                        0,
2814                                        0,
2815                                /*134*/ 0,
2816                                        0,
2817                                        0,
2818                                        0,
2819                                        0,
2820                                /*139*/ 0,
2821                                        0,
2822                                        0,
2823                                        MatFDColoringSetUp_MPIXAIJ,
2824                                        0,
2825                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2826 };
2827 
2828 #undef __FUNCT__
2829 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2830 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2831 {
2832   PetscFunctionBegin;
2833   *a = ((Mat_MPIBAIJ*)A->data)->A;
2834   PetscFunctionReturn(0);
2835 }
2836 
2837 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2838 
2839 #undef __FUNCT__
2840 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2841 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2842 {
2843   PetscInt       m,rstart,cstart,cend;
2844   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2845   const PetscInt *JJ    =0;
2846   PetscScalar    *values=0;
2847   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2848   PetscErrorCode ierr;
2849 
2850   PetscFunctionBegin;
2851   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2852   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2853   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2854   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2855   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2856   m      = B->rmap->n/bs;
2857   rstart = B->rmap->rstart/bs;
2858   cstart = B->cmap->rstart/bs;
2859   cend   = B->cmap->rend/bs;
2860 
2861   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2862   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2863   for (i=0; i<m; i++) {
2864     nz = ii[i+1] - ii[i];
2865     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2866     nz_max = PetscMax(nz_max,nz);
2867     JJ     = jj + ii[i];
2868     for (j=0; j<nz; j++) {
2869       if (*JJ >= cstart) break;
2870       JJ++;
2871     }
2872     d = 0;
2873     for (; j<nz; j++) {
2874       if (*JJ++ >= cend) break;
2875       d++;
2876     }
2877     d_nnz[i] = d;
2878     o_nnz[i] = nz - d;
2879   }
2880   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2881   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2882 
2883   values = (PetscScalar*)V;
2884   if (!values) {
2885     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2886     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2887   }
2888   for (i=0; i<m; i++) {
2889     PetscInt          row    = i + rstart;
2890     PetscInt          ncols  = ii[i+1] - ii[i];
2891     const PetscInt    *icols = jj + ii[i];
2892     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2893       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2894       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2895     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2896       PetscInt j;
2897       for (j=0; j<ncols; j++) {
2898         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2899         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2900       }
2901     }
2902   }
2903 
2904   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2905   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2906   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2907   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2908   PetscFunctionReturn(0);
2909 }
2910 
2911 #undef __FUNCT__
2912 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2913 /*@C
2914    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2915    (the default parallel PETSc format).
2916 
2917    Collective on MPI_Comm
2918 
2919    Input Parameters:
2920 +  B - the matrix
2921 .  bs - the block size
2922 .  i - the indices into j for the start of each local row (starts with zero)
2923 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2924 -  v - optional values in the matrix
2925 
2926    Level: developer
2927 
2928    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2929    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2930    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2931    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2932    block column and the second index is over columns within a block.
2933 
2934 .keywords: matrix, aij, compressed row, sparse, parallel
2935 
2936 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2937 @*/
2938 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2939 {
2940   PetscErrorCode ierr;
2941 
2942   PetscFunctionBegin;
2943   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2944   PetscValidType(B,1);
2945   PetscValidLogicalCollectiveInt(B,bs,2);
2946   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2947   PetscFunctionReturn(0);
2948 }
2949 
2950 #undef __FUNCT__
2951 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2952 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2953 {
2954   Mat_MPIBAIJ    *b;
2955   PetscErrorCode ierr;
2956   PetscInt       i;
2957 
2958   PetscFunctionBegin;
2959   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2960   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2961   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2962   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2963 
2964   if (d_nnz) {
2965     for (i=0; i<B->rmap->n/bs; i++) {
2966       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2967     }
2968   }
2969   if (o_nnz) {
2970     for (i=0; i<B->rmap->n/bs; i++) {
2971       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2972     }
2973   }
2974 
2975   b      = (Mat_MPIBAIJ*)B->data;
2976   b->bs2 = bs*bs;
2977   b->mbs = B->rmap->n/bs;
2978   b->nbs = B->cmap->n/bs;
2979   b->Mbs = B->rmap->N/bs;
2980   b->Nbs = B->cmap->N/bs;
2981 
2982   for (i=0; i<=b->size; i++) {
2983     b->rangebs[i] = B->rmap->range[i]/bs;
2984   }
2985   b->rstartbs = B->rmap->rstart/bs;
2986   b->rendbs   = B->rmap->rend/bs;
2987   b->cstartbs = B->cmap->rstart/bs;
2988   b->cendbs   = B->cmap->rend/bs;
2989 
2990   if (!B->preallocated) {
2991     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2992     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2993     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2994     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2995     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2996     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2997     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2998     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2999     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
3000   }
3001 
3002   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3003   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3004   B->preallocated = PETSC_TRUE;
3005   PetscFunctionReturn(0);
3006 }
3007 
3008 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3009 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3010 
3011 #undef __FUNCT__
3012 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3013 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3014 {
3015   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3016   PetscErrorCode ierr;
3017   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3018   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3019   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3020 
3021   PetscFunctionBegin;
3022   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
3023   ii[0] = 0;
3024   for (i=0; i<M; i++) {
3025     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3026     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3027     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3028     /* remove one from count of matrix has diagonal */
3029     for (j=id[i]; j<id[i+1]; j++) {
3030       if (jd[j] == i) {ii[i+1]--;break;}
3031     }
3032   }
3033   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
3034   cnt  = 0;
3035   for (i=0; i<M; i++) {
3036     for (j=io[i]; j<io[i+1]; j++) {
3037       if (garray[jo[j]] > rstart) break;
3038       jj[cnt++] = garray[jo[j]];
3039     }
3040     for (k=id[i]; k<id[i+1]; k++) {
3041       if (jd[k] != i) {
3042         jj[cnt++] = rstart + jd[k];
3043       }
3044     }
3045     for (; j<io[i+1]; j++) {
3046       jj[cnt++] = garray[jo[j]];
3047     }
3048   }
3049   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
3050   PetscFunctionReturn(0);
3051 }
3052 
3053 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3054 
3055 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3056 
3057 #undef __FUNCT__
3058 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3059 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3060 {
3061   PetscErrorCode ierr;
3062   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3063   Mat            B;
3064   Mat_MPIAIJ     *b;
3065 
3066   PetscFunctionBegin;
3067   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
3068 
3069   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3070   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3071   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3072   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3073   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3074   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
3075   b    = (Mat_MPIAIJ*) B->data;
3076 
3077   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3078   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3079   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3080   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3081   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3082   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3083   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3084   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3085   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3086   if (reuse == MAT_REUSE_MATRIX) {
3087     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3088   } else {
3089    *newmat = B;
3090   }
3091   PetscFunctionReturn(0);
3092 }
3093 
3094 /*MC
3095    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3096 
3097    Options Database Keys:
3098 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3099 . -mat_block_size <bs> - set the blocksize used to store the matrix
3100 - -mat_use_hash_table <fact>
3101 
3102   Level: beginner
3103 
3104 .seealso: MatCreateMPIBAIJ
3105 M*/
3106 
3107 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3108 
3109 #undef __FUNCT__
3110 #define __FUNCT__ "MatCreate_MPIBAIJ"
3111 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3112 {
3113   Mat_MPIBAIJ    *b;
3114   PetscErrorCode ierr;
3115   PetscBool      flg = PETSC_FALSE;
3116 
3117   PetscFunctionBegin;
3118   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3119   B->data = (void*)b;
3120 
3121   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3122   B->assembled = PETSC_FALSE;
3123 
3124   B->insertmode = NOT_SET_VALUES;
3125   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3126   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3127 
3128   /* build local table of row and column ownerships */
3129   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
3130 
3131   /* build cache for off array entries formed */
3132   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3133 
3134   b->donotstash  = PETSC_FALSE;
3135   b->colmap      = NULL;
3136   b->garray      = NULL;
3137   b->roworiented = PETSC_TRUE;
3138 
3139   /* stuff used in block assembly */
3140   b->barray = 0;
3141 
3142   /* stuff used for matrix vector multiply */
3143   b->lvec  = 0;
3144   b->Mvctx = 0;
3145 
3146   /* stuff for MatGetRow() */
3147   b->rowindices   = 0;
3148   b->rowvalues    = 0;
3149   b->getrowactive = PETSC_FALSE;
3150 
3151   /* hash table stuff */
3152   b->ht           = 0;
3153   b->hd           = 0;
3154   b->ht_size      = 0;
3155   b->ht_flag      = PETSC_FALSE;
3156   b->ht_fact      = 0;
3157   b->ht_total_ct  = 0;
3158   b->ht_insert_ct = 0;
3159 
3160   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3161   b->ijonly = PETSC_FALSE;
3162 
3163 
3164   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3165   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3166   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3167   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3168   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3169   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3170   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3171   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3172   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3173   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3174   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3175   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3176 
3177   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3178   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
3179   if (flg) {
3180     PetscReal fact = 1.39;
3181     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3182     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3183     if (fact <= 1.0) fact = 1.39;
3184     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3185     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3186   }
3187   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3188   PetscFunctionReturn(0);
3189 }
3190 
3191 /*MC
3192    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3193 
3194    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3195    and MATMPIBAIJ otherwise.
3196 
3197    Options Database Keys:
3198 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3199 
3200   Level: beginner
3201 
3202 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3203 M*/
3204 
3205 #undef __FUNCT__
3206 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3207 /*@C
3208    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3209    (block compressed row).  For good matrix assembly performance
3210    the user should preallocate the matrix storage by setting the parameters
3211    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3212    performance can be increased by more than a factor of 50.
3213 
3214    Collective on Mat
3215 
3216    Input Parameters:
3217 +  B - the matrix
3218 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3219           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3220 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3221            submatrix  (same for all local rows)
3222 .  d_nnz - array containing the number of block nonzeros in the various block rows
3223            of the in diagonal portion of the local (possibly different for each block
3224            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3225            set it even if it is zero.
3226 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3227            submatrix (same for all local rows).
3228 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3229            off-diagonal portion of the local submatrix (possibly different for
3230            each block row) or NULL.
3231 
3232    If the *_nnz parameter is given then the *_nz parameter is ignored
3233 
3234    Options Database Keys:
3235 +   -mat_block_size - size of the blocks to use
3236 -   -mat_use_hash_table <fact>
3237 
3238    Notes:
3239    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3240    than it must be used on all processors that share the object for that argument.
3241 
3242    Storage Information:
3243    For a square global matrix we define each processor's diagonal portion
3244    to be its local rows and the corresponding columns (a square submatrix);
3245    each processor's off-diagonal portion encompasses the remainder of the
3246    local matrix (a rectangular submatrix).
3247 
3248    The user can specify preallocated storage for the diagonal part of
3249    the local submatrix with either d_nz or d_nnz (not both).  Set
3250    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3251    memory allocation.  Likewise, specify preallocated storage for the
3252    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3253 
3254    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3255    the figure below we depict these three local rows and all columns (0-11).
3256 
3257 .vb
3258            0 1 2 3 4 5 6 7 8 9 10 11
3259           --------------------------
3260    row 3  |o o o d d d o o o o  o  o
3261    row 4  |o o o d d d o o o o  o  o
3262    row 5  |o o o d d d o o o o  o  o
3263           --------------------------
3264 .ve
3265 
3266    Thus, any entries in the d locations are stored in the d (diagonal)
3267    submatrix, and any entries in the o locations are stored in the
3268    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3269    stored simply in the MATSEQBAIJ format for compressed row storage.
3270 
3271    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3272    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3273    In general, for PDE problems in which most nonzeros are near the diagonal,
3274    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3275    or you will get TERRIBLE performance; see the users' manual chapter on
3276    matrices.
3277 
3278    You can call MatGetInfo() to get information on how effective the preallocation was;
3279    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3280    You can also run with the option -info and look for messages with the string
3281    malloc in them to see if additional memory allocation was needed.
3282 
3283    Level: intermediate
3284 
3285 .keywords: matrix, block, aij, compressed row, sparse, parallel
3286 
3287 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3288 @*/
3289 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3290 {
3291   PetscErrorCode ierr;
3292 
3293   PetscFunctionBegin;
3294   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3295   PetscValidType(B,1);
3296   PetscValidLogicalCollectiveInt(B,bs,2);
3297   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3298   PetscFunctionReturn(0);
3299 }
3300 
3301 #undef __FUNCT__
3302 #define __FUNCT__ "MatCreateBAIJ"
3303 /*@C
3304    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3305    (block compressed row).  For good matrix assembly performance
3306    the user should preallocate the matrix storage by setting the parameters
3307    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3308    performance can be increased by more than a factor of 50.
3309 
3310    Collective on MPI_Comm
3311 
3312    Input Parameters:
3313 +  comm - MPI communicator
3314 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3315           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3316 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3317            This value should be the same as the local size used in creating the
3318            y vector for the matrix-vector product y = Ax.
3319 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3320            This value should be the same as the local size used in creating the
3321            x vector for the matrix-vector product y = Ax.
3322 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3323 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3324 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3325            submatrix  (same for all local rows)
3326 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3327            of the in diagonal portion of the local (possibly different for each block
3328            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3329            and set it even if it is zero.
3330 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3331            submatrix (same for all local rows).
3332 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3333            off-diagonal portion of the local submatrix (possibly different for
3334            each block row) or NULL.
3335 
3336    Output Parameter:
3337 .  A - the matrix
3338 
3339    Options Database Keys:
3340 +   -mat_block_size - size of the blocks to use
3341 -   -mat_use_hash_table <fact>
3342 
3343    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3344    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3345    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3346 
3347    Notes:
3348    If the *_nnz parameter is given then the *_nz parameter is ignored
3349 
3350    A nonzero block is any block that as 1 or more nonzeros in it
3351 
3352    The user MUST specify either the local or global matrix dimensions
3353    (possibly both).
3354 
3355    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3356    than it must be used on all processors that share the object for that argument.
3357 
3358    Storage Information:
3359    For a square global matrix we define each processor's diagonal portion
3360    to be its local rows and the corresponding columns (a square submatrix);
3361    each processor's off-diagonal portion encompasses the remainder of the
3362    local matrix (a rectangular submatrix).
3363 
3364    The user can specify preallocated storage for the diagonal part of
3365    the local submatrix with either d_nz or d_nnz (not both).  Set
3366    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3367    memory allocation.  Likewise, specify preallocated storage for the
3368    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3369 
3370    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3371    the figure below we depict these three local rows and all columns (0-11).
3372 
3373 .vb
3374            0 1 2 3 4 5 6 7 8 9 10 11
3375           --------------------------
3376    row 3  |o o o d d d o o o o  o  o
3377    row 4  |o o o d d d o o o o  o  o
3378    row 5  |o o o d d d o o o o  o  o
3379           --------------------------
3380 .ve
3381 
3382    Thus, any entries in the d locations are stored in the d (diagonal)
3383    submatrix, and any entries in the o locations are stored in the
3384    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3385    stored simply in the MATSEQBAIJ format for compressed row storage.
3386 
3387    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3388    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3389    In general, for PDE problems in which most nonzeros are near the diagonal,
3390    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3391    or you will get TERRIBLE performance; see the users' manual chapter on
3392    matrices.
3393 
3394    Level: intermediate
3395 
3396 .keywords: matrix, block, aij, compressed row, sparse, parallel
3397 
3398 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3399 @*/
3400 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3401 {
3402   PetscErrorCode ierr;
3403   PetscMPIInt    size;
3404 
3405   PetscFunctionBegin;
3406   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3407   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3408   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3409   if (size > 1) {
3410     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3411     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3412   } else {
3413     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3414     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3415   }
3416   PetscFunctionReturn(0);
3417 }
3418 
3419 #undef __FUNCT__
3420 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3421 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3422 {
3423   Mat            mat;
3424   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3425   PetscErrorCode ierr;
3426   PetscInt       len=0;
3427 
3428   PetscFunctionBegin;
3429   *newmat = 0;
3430   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3431   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3432   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3433   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3434 
3435   mat->factortype   = matin->factortype;
3436   mat->preallocated = PETSC_TRUE;
3437   mat->assembled    = PETSC_TRUE;
3438   mat->insertmode   = NOT_SET_VALUES;
3439 
3440   a             = (Mat_MPIBAIJ*)mat->data;
3441   mat->rmap->bs = matin->rmap->bs;
3442   a->bs2        = oldmat->bs2;
3443   a->mbs        = oldmat->mbs;
3444   a->nbs        = oldmat->nbs;
3445   a->Mbs        = oldmat->Mbs;
3446   a->Nbs        = oldmat->Nbs;
3447 
3448   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3449   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3450 
3451   a->size         = oldmat->size;
3452   a->rank         = oldmat->rank;
3453   a->donotstash   = oldmat->donotstash;
3454   a->roworiented  = oldmat->roworiented;
3455   a->rowindices   = 0;
3456   a->rowvalues    = 0;
3457   a->getrowactive = PETSC_FALSE;
3458   a->barray       = 0;
3459   a->rstartbs     = oldmat->rstartbs;
3460   a->rendbs       = oldmat->rendbs;
3461   a->cstartbs     = oldmat->cstartbs;
3462   a->cendbs       = oldmat->cendbs;
3463 
3464   /* hash table stuff */
3465   a->ht           = 0;
3466   a->hd           = 0;
3467   a->ht_size      = 0;
3468   a->ht_flag      = oldmat->ht_flag;
3469   a->ht_fact      = oldmat->ht_fact;
3470   a->ht_total_ct  = 0;
3471   a->ht_insert_ct = 0;
3472 
3473   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3474   if (oldmat->colmap) {
3475 #if defined(PETSC_USE_CTABLE)
3476     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3477 #else
3478     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3479     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3480     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3481 #endif
3482   } else a->colmap = 0;
3483 
3484   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3485     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3486     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3487     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3488   } else a->garray = 0;
3489 
3490   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3491   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3492   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3493   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3494   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3495 
3496   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3497   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3498   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3499   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3500   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3501   *newmat = mat;
3502   PetscFunctionReturn(0);
3503 }
3504 
3505 #undef __FUNCT__
3506 #define __FUNCT__ "MatLoad_MPIBAIJ"
3507 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3508 {
3509   PetscErrorCode ierr;
3510   int            fd;
3511   PetscInt       i,nz,j,rstart,rend;
3512   PetscScalar    *vals,*buf;
3513   MPI_Comm       comm;
3514   MPI_Status     status;
3515   PetscMPIInt    rank,size,maxnz;
3516   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3517   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3518   PetscInt       jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
3519   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3520   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3521   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3522 
3523   PetscFunctionBegin;
3524   /* force binary viewer to load .info file if it has not yet done so */
3525   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3526   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3527   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3528   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3529   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3530   if (bs < 0) bs = 1;
3531 
3532   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3533   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3534   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3535   if (!rank) {
3536     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3537     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3538   }
3539   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3540   M    = header[1]; N = header[2];
3541 
3542   /* If global sizes are set, check if they are consistent with that given in the file */
3543   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
3544   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
3545 
3546   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3547 
3548   /*
3549      This code adds extra rows to make sure the number of rows is
3550      divisible by the blocksize
3551   */
3552   Mbs        = M/bs;
3553   extra_rows = bs - M + bs*Mbs;
3554   if (extra_rows == bs) extra_rows = 0;
3555   else                  Mbs++;
3556   if (extra_rows && !rank) {
3557     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3558   }
3559 
3560   /* determine ownership of all rows */
3561   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3562     mbs = Mbs/size + ((Mbs % size) > rank);
3563     m   = mbs*bs;
3564   } else { /* User set */
3565     m   = newmat->rmap->n;
3566     mbs = m/bs;
3567   }
3568   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3569   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3570 
3571   /* process 0 needs enough room for process with most rows */
3572   if (!rank) {
3573     mmax = rowners[1];
3574     for (i=2; i<=size; i++) {
3575       mmax = PetscMax(mmax,rowners[i]);
3576     }
3577     mmax*=bs;
3578   } else mmax = -1;             /* unused, but compiler warns anyway */
3579 
3580   rowners[0] = 0;
3581   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3582   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3583   rstart = rowners[rank];
3584   rend   = rowners[rank+1];
3585 
3586   /* distribute row lengths to all processors */
3587   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3588   if (!rank) {
3589     mend = m;
3590     if (size == 1) mend = mend - extra_rows;
3591     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3592     for (j=mend; j<m; j++) locrowlens[j] = 1;
3593     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3594     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3595     for (j=0; j<m; j++) {
3596       procsnz[0] += locrowlens[j];
3597     }
3598     for (i=1; i<size; i++) {
3599       mend = browners[i+1] - browners[i];
3600       if (i == size-1) mend = mend - extra_rows;
3601       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3602       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3603       /* calculate the number of nonzeros on each processor */
3604       for (j=0; j<browners[i+1]-browners[i]; j++) {
3605         procsnz[i] += rowlengths[j];
3606       }
3607       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3608     }
3609     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3610   } else {
3611     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3612   }
3613 
3614   if (!rank) {
3615     /* determine max buffer needed and allocate it */
3616     maxnz = procsnz[0];
3617     for (i=1; i<size; i++) {
3618       maxnz = PetscMax(maxnz,procsnz[i]);
3619     }
3620     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3621 
3622     /* read in my part of the matrix column indices  */
3623     nz     = procsnz[0];
3624     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3625     mycols = ibuf;
3626     if (size == 1) nz -= extra_rows;
3627     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3628     if (size == 1) {
3629       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3630     }
3631 
3632     /* read in every ones (except the last) and ship off */
3633     for (i=1; i<size-1; i++) {
3634       nz   = procsnz[i];
3635       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3636       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3637     }
3638     /* read in the stuff for the last proc */
3639     if (size != 1) {
3640       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3641       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3642       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3643       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3644     }
3645     ierr = PetscFree(cols);CHKERRQ(ierr);
3646   } else {
3647     /* determine buffer space needed for message */
3648     nz = 0;
3649     for (i=0; i<m; i++) {
3650       nz += locrowlens[i];
3651     }
3652     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3653     mycols = ibuf;
3654     /* receive message of column indices*/
3655     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3656     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3657     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3658   }
3659 
3660   /* loop over local rows, determining number of off diagonal entries */
3661   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3662   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3663   rowcount = 0; nzcount = 0;
3664   for (i=0; i<mbs; i++) {
3665     dcount  = 0;
3666     odcount = 0;
3667     for (j=0; j<bs; j++) {
3668       kmax = locrowlens[rowcount];
3669       for (k=0; k<kmax; k++) {
3670         tmp = mycols[nzcount++]/bs;
3671         if (!mask[tmp]) {
3672           mask[tmp] = 1;
3673           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3674           else masked1[dcount++] = tmp;
3675         }
3676       }
3677       rowcount++;
3678     }
3679 
3680     dlens[i]  = dcount;
3681     odlens[i] = odcount;
3682 
3683     /* zero out the mask elements we set */
3684     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3685     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3686   }
3687 
3688   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3689   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3690 
3691   if (!rank) {
3692     ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr);
3693     /* read in my part of the matrix numerical values  */
3694     nz     = procsnz[0];
3695     vals   = buf;
3696     mycols = ibuf;
3697     if (size == 1) nz -= extra_rows;
3698     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3699     if (size == 1) {
3700       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3701     }
3702 
3703     /* insert into matrix */
3704     jj = rstart*bs;
3705     for (i=0; i<m; i++) {
3706       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3707       mycols += locrowlens[i];
3708       vals   += locrowlens[i];
3709       jj++;
3710     }
3711     /* read in other processors (except the last one) and ship out */
3712     for (i=1; i<size-1; i++) {
3713       nz   = procsnz[i];
3714       vals = buf;
3715       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3716       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3717     }
3718     /* the last proc */
3719     if (size != 1) {
3720       nz   = procsnz[i] - extra_rows;
3721       vals = buf;
3722       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3723       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3724       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3725     }
3726     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3727   } else {
3728     /* receive numeric values */
3729     ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr);
3730 
3731     /* receive message of values*/
3732     vals   = buf;
3733     mycols = ibuf;
3734     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3735 
3736     /* insert into matrix */
3737     jj = rstart*bs;
3738     for (i=0; i<m; i++) {
3739       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3740       mycols += locrowlens[i];
3741       vals   += locrowlens[i];
3742       jj++;
3743     }
3744   }
3745   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3746   ierr = PetscFree(buf);CHKERRQ(ierr);
3747   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3748   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3749   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3750   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3751   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3752   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3753   PetscFunctionReturn(0);
3754 }
3755 
3756 #undef __FUNCT__
3757 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3758 /*@
3759    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3760 
3761    Input Parameters:
3762 .  mat  - the matrix
3763 .  fact - factor
3764 
3765    Not Collective, each process can use a different factor
3766 
3767    Level: advanced
3768 
3769   Notes:
3770    This can also be set by the command line option: -mat_use_hash_table <fact>
3771 
3772 .keywords: matrix, hashtable, factor, HT
3773 
3774 .seealso: MatSetOption()
3775 @*/
3776 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3777 {
3778   PetscErrorCode ierr;
3779 
3780   PetscFunctionBegin;
3781   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3782   PetscFunctionReturn(0);
3783 }
3784 
3785 #undef __FUNCT__
3786 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3787 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3788 {
3789   Mat_MPIBAIJ *baij;
3790 
3791   PetscFunctionBegin;
3792   baij          = (Mat_MPIBAIJ*)mat->data;
3793   baij->ht_fact = fact;
3794   PetscFunctionReturn(0);
3795 }
3796 
3797 #undef __FUNCT__
3798 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3799 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3800 {
3801   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3802 
3803   PetscFunctionBegin;
3804   if (Ad)     *Ad     = a->A;
3805   if (Ao)     *Ao     = a->B;
3806   if (colmap) *colmap = a->garray;
3807   PetscFunctionReturn(0);
3808 }
3809 
3810 /*
3811     Special version for direct calls from Fortran (to eliminate two function call overheads
3812 */
3813 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3814 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3815 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3816 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3817 #endif
3818 
3819 #undef __FUNCT__
3820 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3821 /*@C
3822   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3823 
3824   Collective on Mat
3825 
3826   Input Parameters:
3827 + mat - the matrix
3828 . min - number of input rows
3829 . im - input rows
3830 . nin - number of input columns
3831 . in - input columns
3832 . v - numerical values input
3833 - addvin - INSERT_VALUES or ADD_VALUES
3834 
3835   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3836 
3837   Level: advanced
3838 
3839 .seealso:   MatSetValuesBlocked()
3840 @*/
3841 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3842 {
3843   /* convert input arguments to C version */
3844   Mat        mat  = *matin;
3845   PetscInt   m    = *min, n = *nin;
3846   InsertMode addv = *addvin;
3847 
3848   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3849   const MatScalar *value;
3850   MatScalar       *barray     = baij->barray;
3851   PetscBool       roworiented = baij->roworiented;
3852   PetscErrorCode  ierr;
3853   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3854   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3855   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3856 
3857   PetscFunctionBegin;
3858   /* tasks normally handled by MatSetValuesBlocked() */
3859   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3860 #if defined(PETSC_USE_DEBUG)
3861   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3862   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3863 #endif
3864   if (mat->assembled) {
3865     mat->was_assembled = PETSC_TRUE;
3866     mat->assembled     = PETSC_FALSE;
3867   }
3868   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3869 
3870 
3871   if (!barray) {
3872     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3873     baij->barray = barray;
3874   }
3875 
3876   if (roworiented) stepval = (n-1)*bs;
3877   else stepval = (m-1)*bs;
3878 
3879   for (i=0; i<m; i++) {
3880     if (im[i] < 0) continue;
3881 #if defined(PETSC_USE_DEBUG)
3882     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3883 #endif
3884     if (im[i] >= rstart && im[i] < rend) {
3885       row = im[i] - rstart;
3886       for (j=0; j<n; j++) {
3887         /* If NumCol = 1 then a copy is not required */
3888         if ((roworiented) && (n == 1)) {
3889           barray = (MatScalar*)v + i*bs2;
3890         } else if ((!roworiented) && (m == 1)) {
3891           barray = (MatScalar*)v + j*bs2;
3892         } else { /* Here a copy is required */
3893           if (roworiented) {
3894             value = v + i*(stepval+bs)*bs + j*bs;
3895           } else {
3896             value = v + j*(stepval+bs)*bs + i*bs;
3897           }
3898           for (ii=0; ii<bs; ii++,value+=stepval) {
3899             for (jj=0; jj<bs; jj++) {
3900               *barray++ = *value++;
3901             }
3902           }
3903           barray -=bs2;
3904         }
3905 
3906         if (in[j] >= cstart && in[j] < cend) {
3907           col  = in[j] - cstart;
3908           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3909         } else if (in[j] < 0) continue;
3910 #if defined(PETSC_USE_DEBUG)
3911         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3912 #endif
3913         else {
3914           if (mat->was_assembled) {
3915             if (!baij->colmap) {
3916               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3917             }
3918 
3919 #if defined(PETSC_USE_DEBUG)
3920 #if defined(PETSC_USE_CTABLE)
3921             { PetscInt data;
3922               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3923               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3924             }
3925 #else
3926             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3927 #endif
3928 #endif
3929 #if defined(PETSC_USE_CTABLE)
3930             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3931             col  = (col - 1)/bs;
3932 #else
3933             col = (baij->colmap[in[j]] - 1)/bs;
3934 #endif
3935             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3936               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3937               col  =  in[j];
3938             }
3939           } else col = in[j];
3940           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3941         }
3942       }
3943     } else {
3944       if (!baij->donotstash) {
3945         if (roworiented) {
3946           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3947         } else {
3948           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3949         }
3950       }
3951     }
3952   }
3953 
3954   /* task normally handled by MatSetValuesBlocked() */
3955   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3956   PetscFunctionReturn(0);
3957 }
3958 
3959 #undef __FUNCT__
3960 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3961 /*@
3962      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3963          CSR format the local rows.
3964 
3965    Collective on MPI_Comm
3966 
3967    Input Parameters:
3968 +  comm - MPI communicator
3969 .  bs - the block size, only a block size of 1 is supported
3970 .  m - number of local rows (Cannot be PETSC_DECIDE)
3971 .  n - This value should be the same as the local size used in creating the
3972        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3973        calculated if N is given) For square matrices n is almost always m.
3974 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3975 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3976 .   i - row indices
3977 .   j - column indices
3978 -   a - matrix values
3979 
3980    Output Parameter:
3981 .   mat - the matrix
3982 
3983    Level: intermediate
3984 
3985    Notes:
3986        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3987      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3988      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3989 
3990      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3991      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3992      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3993      with column-major ordering within blocks.
3994 
3995        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3996 
3997 .keywords: matrix, aij, compressed row, sparse, parallel
3998 
3999 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4000           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4001 @*/
4002 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4003 {
4004   PetscErrorCode ierr;
4005 
4006   PetscFunctionBegin;
4007   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4008   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4009   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4010   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4011   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4012   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4013   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4014   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4015   PetscFunctionReturn(0);
4016 }
4017 
4018 #undef __FUNCT__
4019 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIBAIJ"
4020 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4021 {
4022   PetscErrorCode ierr;
4023   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
4024   PetscInt       *indx;
4025   PetscScalar    *values;
4026 
4027   PetscFunctionBegin;
4028   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4029   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4030     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
4031     PetscInt       *dnz,*onz,sum,mbs,Nbs;
4032     PetscInt       *bindx,rmax=a->rmax,j;
4033 
4034     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4035     mbs = m/bs; Nbs = N/cbs;
4036     if (n == PETSC_DECIDE) {
4037       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
4038     }
4039     /* Check sum(n) = Nbs */
4040     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4041     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
4042 
4043     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4044     rstart -= mbs;
4045 
4046     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
4047     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
4048     for (i=0; i<mbs; i++) {
4049       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
4050       nnz = nnz/bs;
4051       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
4052       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
4053       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
4054     }
4055     ierr = PetscFree(bindx);CHKERRQ(ierr);
4056 
4057     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4058     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4059     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4060     ierr = MatSetType(*outmat,MATMPIBAIJ);CHKERRQ(ierr);
4061     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
4062     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4063   }
4064 
4065   /* numeric phase */
4066   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4067   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4068 
4069   for (i=0; i<m; i++) {
4070     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4071     Ii   = i + rstart;
4072     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4073     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4074   }
4075   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4076   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4077   PetscFunctionReturn(0);
4078 }
4079