xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 1fbc92bbec8f3dfe2fa2c986d447f29fa1d9b654)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode    ierr;
15   PetscInt          i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs;
16   PetscScalar       *va,*vv;
17   Vec               vB,vA;
18   const PetscScalar *vb;
19 
20   PetscFunctionBegin;
21   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
22   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
23 
24   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
25   if (idx) {
26     for (i=0; i<m; i++) {
27       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
28     }
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
32   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
33   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
34 
35   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
36   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
37   for (i=0; i<m; i++) {
38     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
39       vv[i] = vb[i];
40       if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
41     } else {
42       vv[i] = va[i];
43       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs))
44         idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
45     }
46   }
47   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
48   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
49   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
50   ierr = PetscFree(idxb);CHKERRQ(ierr);
51   ierr = VecDestroy(&vA);CHKERRQ(ierr);
52   ierr = VecDestroy(&vB);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 
56 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
57 {
58   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
59   PetscErrorCode ierr;
60 
61   PetscFunctionBegin;
62   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
63   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79      Local utility routine that creates a mapping from the global column
80    number to the local number in the off-diagonal part of the local
81    storage of the matrix.  This is done in a non scalable way since the
82    length of colmap equals the global matrix length.
83 */
84 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
85 {
86   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
87   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
88   PetscErrorCode ierr;
89   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
90 
91   PetscFunctionBegin;
92 #if defined(PETSC_USE_CTABLE)
93   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
94   for (i=0; i<nbs; i++) {
95     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
96   }
97 #else
98   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
99   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
100   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
101 #endif
102   PetscFunctionReturn(0);
103 }
104 
105 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
106   { \
107     brow = row/bs;  \
108     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
109     rmax = aimax[brow]; nrow = ailen[brow]; \
110     bcol = col/bs; \
111     ridx = row % bs; cidx = col % bs; \
112     low  = 0; high = nrow; \
113     while (high-low > 3) { \
114       t = (low+high)/2; \
115       if (rp[t] > bcol) high = t; \
116       else              low  = t; \
117     } \
118     for (_i=low; _i<high; _i++) { \
119       if (rp[_i] > bcol) break; \
120       if (rp[_i] == bcol) { \
121         bap = ap +  bs2*_i + bs*cidx + ridx; \
122         if (addv == ADD_VALUES) *bap += value;  \
123         else                    *bap  = value;  \
124         goto a_noinsert; \
125       } \
126     } \
127     if (a->nonew == 1) goto a_noinsert; \
128     PetscCheckFalse(a->nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
129     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
130     N = nrow++ - 1;  \
131     /* shift up all the later entries in this row */ \
132     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
133     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
134     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
135     rp[_i]                      = bcol;  \
136     ap[bs2*_i + bs*cidx + ridx] = value;  \
137 a_noinsert:; \
138     ailen[brow] = nrow; \
139   }
140 
141 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
142   { \
143     brow = row/bs;  \
144     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
145     rmax = bimax[brow]; nrow = bilen[brow]; \
146     bcol = col/bs; \
147     ridx = row % bs; cidx = col % bs; \
148     low  = 0; high = nrow; \
149     while (high-low > 3) { \
150       t = (low+high)/2; \
151       if (rp[t] > bcol) high = t; \
152       else              low  = t; \
153     } \
154     for (_i=low; _i<high; _i++) { \
155       if (rp[_i] > bcol) break; \
156       if (rp[_i] == bcol) { \
157         bap = ap +  bs2*_i + bs*cidx + ridx; \
158         if (addv == ADD_VALUES) *bap += value;  \
159         else                    *bap  = value;  \
160         goto b_noinsert; \
161       } \
162     } \
163     if (b->nonew == 1) goto b_noinsert; \
164     PetscCheckFalse(b->nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
165     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
166     N = nrow++ - 1;  \
167     /* shift up all the later entries in this row */ \
168     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
169     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
170     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
171     rp[_i]                      = bcol;  \
172     ap[bs2*_i + bs*cidx + ridx] = value;  \
173 b_noinsert:; \
174     bilen[brow] = nrow; \
175   }
176 
177 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
178 {
179   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
180   MatScalar      value;
181   PetscBool      roworiented = baij->roworiented;
182   PetscErrorCode ierr;
183   PetscInt       i,j,row,col;
184   PetscInt       rstart_orig=mat->rmap->rstart;
185   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
186   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
187 
188   /* Some Variables required in the macro */
189   Mat         A     = baij->A;
190   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
191   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
192   MatScalar   *aa   =a->a;
193 
194   Mat         B     = baij->B;
195   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
196   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
197   MatScalar   *ba   =b->a;
198 
199   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
200   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
201   MatScalar *ap,*bap;
202 
203   PetscFunctionBegin;
204   for (i=0; i<m; i++) {
205     if (im[i] < 0) continue;
206     PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
207     if (im[i] >= rstart_orig && im[i] < rend_orig) {
208       row = im[i] - rstart_orig;
209       for (j=0; j<n; j++) {
210         if (in[j] >= cstart_orig && in[j] < cend_orig) {
211           col = in[j] - cstart_orig;
212           if (roworiented) value = v[i*n+j];
213           else             value = v[i+j*m];
214           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
215         } else if (in[j] < 0) continue;
216         else PetscCheckFalse(in[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
217         else {
218           if (mat->was_assembled) {
219             if (!baij->colmap) {
220               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
221             }
222 #if defined(PETSC_USE_CTABLE)
223             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
224             col  = col - 1;
225 #else
226             col = baij->colmap[in[j]/bs] - 1;
227 #endif
228             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
229               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
230               col  =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
232               B    = baij->B;
233               b    = (Mat_SeqBAIJ*)(B)->data;
234               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
235               ba   =b->a;
236             } else PetscCheckFalse(col < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
237             else col += in[j]%bs;
238           } else col = in[j];
239           if (roworiented) value = v[i*n+j];
240           else             value = v[i+j*m];
241           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
242           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
243         }
244       }
245     } else {
246       PetscCheckFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
247       if (!baij->donotstash) {
248         mat->assembled = PETSC_FALSE;
249         if (roworiented) {
250           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
251         } else {
252           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
253         }
254       }
255     }
256   }
257   PetscFunctionReturn(0);
258 }
259 
260 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
261 {
262   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
263   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
264   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
265   PetscErrorCode    ierr;
266   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
267   PetscBool         roworiented=a->roworiented;
268   const PetscScalar *value     = v;
269   MatScalar         *ap,*aa = a->a,*bap;
270 
271   PetscFunctionBegin;
272   rp   = aj + ai[row];
273   ap   = aa + bs2*ai[row];
274   rmax = imax[row];
275   nrow = ailen[row];
276   value = v;
277   low = 0;
278   high = nrow;
279   while (high-low > 7) {
280     t = (low+high)/2;
281     if (rp[t] > col) high = t;
282     else             low  = t;
283   }
284   for (i=low; i<high; i++) {
285     if (rp[i] > col) break;
286     if (rp[i] == col) {
287       bap = ap +  bs2*i;
288       if (roworiented) {
289         if (is == ADD_VALUES) {
290           for (ii=0; ii<bs; ii++) {
291             for (jj=ii; jj<bs2; jj+=bs) {
292               bap[jj] += *value++;
293             }
294           }
295         } else {
296           for (ii=0; ii<bs; ii++) {
297             for (jj=ii; jj<bs2; jj+=bs) {
298               bap[jj] = *value++;
299             }
300           }
301         }
302       } else {
303         if (is == ADD_VALUES) {
304           for (ii=0; ii<bs; ii++,value+=bs) {
305             for (jj=0; jj<bs; jj++) {
306               bap[jj] += value[jj];
307             }
308             bap += bs;
309           }
310         } else {
311           for (ii=0; ii<bs; ii++,value+=bs) {
312             for (jj=0; jj<bs; jj++) {
313               bap[jj]  = value[jj];
314             }
315             bap += bs;
316           }
317         }
318       }
319       goto noinsert2;
320     }
321   }
322   if (nonew == 1) goto noinsert2;
323   PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
324   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
325   N = nrow++ - 1; high++;
326   /* shift up all the later entries in this row */
327   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
328   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
329   rp[i] = col;
330   bap   = ap +  bs2*i;
331   if (roworiented) {
332     for (ii=0; ii<bs; ii++) {
333       for (jj=ii; jj<bs2; jj+=bs) {
334         bap[jj] = *value++;
335       }
336     }
337   } else {
338     for (ii=0; ii<bs; ii++) {
339       for (jj=0; jj<bs; jj++) {
340         *bap++ = *value++;
341       }
342     }
343   }
344   noinsert2:;
345   ailen[row] = nrow;
346   PetscFunctionReturn(0);
347 }
348 
349 /*
350     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
351     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
352 */
353 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
354 {
355   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
356   const PetscScalar *value;
357   MatScalar         *barray     = baij->barray;
358   PetscBool         roworiented = baij->roworiented;
359   PetscErrorCode    ierr;
360   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
361   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
362   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
363 
364   PetscFunctionBegin;
365   if (!barray) {
366     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
367     baij->barray = barray;
368   }
369 
370   if (roworiented) stepval = (n-1)*bs;
371   else stepval = (m-1)*bs;
372 
373   for (i=0; i<m; i++) {
374     if (im[i] < 0) continue;
375     PetscAssertFalse(im[i] >= baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
376     if (im[i] >= rstart && im[i] < rend) {
377       row = im[i] - rstart;
378       for (j=0; j<n; j++) {
379         /* If NumCol = 1 then a copy is not required */
380         if ((roworiented) && (n == 1)) {
381           barray = (MatScalar*)v + i*bs2;
382         } else if ((!roworiented) && (m == 1)) {
383           barray = (MatScalar*)v + j*bs2;
384         } else { /* Here a copy is required */
385           if (roworiented) {
386             value = v + (i*(stepval+bs) + j)*bs;
387           } else {
388             value = v + (j*(stepval+bs) + i)*bs;
389           }
390           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
391             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
392             barray += bs;
393           }
394           barray -= bs2;
395         }
396 
397         if (in[j] >= cstart && in[j] < cend) {
398           col  = in[j] - cstart;
399           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
400         } else if (in[j] < 0) continue;
401         else PetscAssertFalse(in[j] >= baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
402         else {
403           if (mat->was_assembled) {
404             if (!baij->colmap) {
405               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
406             }
407 
408 #if defined(PETSC_USE_DEBUG)
409 #if defined(PETSC_USE_CTABLE)
410             { PetscInt data;
411               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
412               PetscCheckFalse((data - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
413             }
414 #else
415             PetscCheckFalse((baij->colmap[in[j]] - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
416 #endif
417 #endif
418 #if defined(PETSC_USE_CTABLE)
419             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
420             col  = (col - 1)/bs;
421 #else
422             col = (baij->colmap[in[j]] - 1)/bs;
423 #endif
424             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
425               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
426               col  =  in[j];
427             } else PetscCheckFalse(col < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix",im[i],in[j]);
428           } else col = in[j];
429           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
430         }
431       }
432     } else {
433       PetscCheckFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
434       if (!baij->donotstash) {
435         if (roworiented) {
436           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
437         } else {
438           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
439         }
440       }
441     }
442   }
443   PetscFunctionReturn(0);
444 }
445 
446 #define HASH_KEY 0.6180339887
447 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
448 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
449 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
450 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
451 {
452   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
453   PetscBool      roworiented = baij->roworiented;
454   PetscErrorCode ierr;
455   PetscInt       i,j,row,col;
456   PetscInt       rstart_orig=mat->rmap->rstart;
457   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
458   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
459   PetscReal      tmp;
460   MatScalar      **HD = baij->hd,value;
461   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
462 
463   PetscFunctionBegin;
464   for (i=0; i<m; i++) {
465     if (PetscDefined(USE_DEBUG)) {
466       PetscCheckFalse(im[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
467       PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
468     }
469     row = im[i];
470     if (row >= rstart_orig && row < rend_orig) {
471       for (j=0; j<n; j++) {
472         col = in[j];
473         if (roworiented) value = v[i*n+j];
474         else             value = v[i+j*m];
475         /* Look up PetscInto the Hash Table */
476         key = (row/bs)*Nbs+(col/bs)+1;
477         h1  = HASH(size,key,tmp);
478 
479         idx = h1;
480         if (PetscDefined(USE_DEBUG)) {
481           insert_ct++;
482           total_ct++;
483           if (HT[idx] != key) {
484             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
485             if (idx == size) {
486               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
487               PetscCheckFalse(idx == h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
488             }
489           }
490         } else if (HT[idx] != key) {
491           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
492           if (idx == size) {
493             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
494             PetscCheckFalse(idx == h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
495           }
496         }
497         /* A HASH table entry is found, so insert the values at the correct address */
498         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
499         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
500       }
501     } else if (!baij->donotstash) {
502       if (roworiented) {
503         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
504       } else {
505         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
506       }
507     }
508   }
509   if (PetscDefined(USE_DEBUG)) {
510     baij->ht_total_ct  += total_ct;
511     baij->ht_insert_ct += insert_ct;
512   }
513   PetscFunctionReturn(0);
514 }
515 
516 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
517 {
518   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
519   PetscBool         roworiented = baij->roworiented;
520   PetscErrorCode    ierr;
521   PetscInt          i,j,ii,jj,row,col;
522   PetscInt          rstart=baij->rstartbs;
523   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
524   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
525   PetscReal         tmp;
526   MatScalar         **HD = baij->hd,*baij_a;
527   const PetscScalar *v_t,*value;
528   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
529 
530   PetscFunctionBegin;
531   if (roworiented) stepval = (n-1)*bs;
532   else stepval = (m-1)*bs;
533 
534   for (i=0; i<m; i++) {
535     if (PetscDefined(USE_DEBUG)) {
536       PetscCheckFalse(im[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,im[i]);
537       PetscCheckFalse(im[i] >= baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
538     }
539     row = im[i];
540     v_t = v + i*nbs2;
541     if (row >= rstart && row < rend) {
542       for (j=0; j<n; j++) {
543         col = in[j];
544 
545         /* Look up into the Hash Table */
546         key = row*Nbs+col+1;
547         h1  = HASH(size,key,tmp);
548 
549         idx = h1;
550         if (PetscDefined(USE_DEBUG)) {
551           total_ct++;
552           insert_ct++;
553           if (HT[idx] != key) {
554             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
555             if (idx == size) {
556               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
557               PetscCheckFalse(idx == h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
558             }
559           }
560         } else if (HT[idx] != key) {
561           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
562           if (idx == size) {
563             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
564             PetscCheckFalse(idx == h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
565           }
566         }
567         baij_a = HD[idx];
568         if (roworiented) {
569           /*value = v + i*(stepval+bs)*bs + j*bs;*/
570           /* value = v + (i*(stepval+bs)+j)*bs; */
571           value = v_t;
572           v_t  += bs;
573           if (addv == ADD_VALUES) {
574             for (ii=0; ii<bs; ii++,value+=stepval) {
575               for (jj=ii; jj<bs2; jj+=bs) {
576                 baij_a[jj] += *value++;
577               }
578             }
579           } else {
580             for (ii=0; ii<bs; ii++,value+=stepval) {
581               for (jj=ii; jj<bs2; jj+=bs) {
582                 baij_a[jj] = *value++;
583               }
584             }
585           }
586         } else {
587           value = v + j*(stepval+bs)*bs + i*bs;
588           if (addv == ADD_VALUES) {
589             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
590               for (jj=0; jj<bs; jj++) {
591                 baij_a[jj] += *value++;
592               }
593             }
594           } else {
595             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
596               for (jj=0; jj<bs; jj++) {
597                 baij_a[jj] = *value++;
598               }
599             }
600           }
601         }
602       }
603     } else {
604       if (!baij->donotstash) {
605         if (roworiented) {
606           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
607         } else {
608           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
609         }
610       }
611     }
612   }
613   if (PetscDefined(USE_DEBUG)) {
614     baij->ht_total_ct  += total_ct;
615     baij->ht_insert_ct += insert_ct;
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
621 {
622   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
623   PetscErrorCode ierr;
624   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
625   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
626 
627   PetscFunctionBegin;
628   for (i=0; i<m; i++) {
629     if (idxm[i] < 0) continue; /* negative row */
630     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
631     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
632       row = idxm[i] - bsrstart;
633       for (j=0; j<n; j++) {
634         if (idxn[j] < 0) continue; /* negative column */
635         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
636         if (idxn[j] >= bscstart && idxn[j] < bscend) {
637           col  = idxn[j] - bscstart;
638           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
639         } else {
640           if (!baij->colmap) {
641             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
642           }
643 #if defined(PETSC_USE_CTABLE)
644           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
645           data--;
646 #else
647           data = baij->colmap[idxn[j]/bs]-1;
648 #endif
649           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
650           else {
651             col  = data + idxn[j]%bs;
652             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
653           }
654         }
655       }
656     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
657   }
658   PetscFunctionReturn(0);
659 }
660 
661 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
662 {
663   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
664   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
665   PetscErrorCode ierr;
666   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
667   PetscReal      sum = 0.0;
668   MatScalar      *v;
669 
670   PetscFunctionBegin;
671   if (baij->size == 1) {
672     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
673   } else {
674     if (type == NORM_FROBENIUS) {
675       v  = amat->a;
676       nz = amat->nz*bs2;
677       for (i=0; i<nz; i++) {
678         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
679       }
680       v  = bmat->a;
681       nz = bmat->nz*bs2;
682       for (i=0; i<nz; i++) {
683         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
684       }
685       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
686       *nrm = PetscSqrtReal(*nrm);
687     } else if (type == NORM_1) { /* max column sum */
688       PetscReal *tmp,*tmp2;
689       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
690       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
691       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
692       v    = amat->a; jj = amat->j;
693       for (i=0; i<amat->nz; i++) {
694         for (j=0; j<bs; j++) {
695           col = bs*(cstart + *jj) + j; /* column index */
696           for (row=0; row<bs; row++) {
697             tmp[col] += PetscAbsScalar(*v);  v++;
698           }
699         }
700         jj++;
701       }
702       v = bmat->a; jj = bmat->j;
703       for (i=0; i<bmat->nz; i++) {
704         for (j=0; j<bs; j++) {
705           col = bs*garray[*jj] + j;
706           for (row=0; row<bs; row++) {
707             tmp[col] += PetscAbsScalar(*v); v++;
708           }
709         }
710         jj++;
711       }
712       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
713       *nrm = 0.0;
714       for (j=0; j<mat->cmap->N; j++) {
715         if (tmp2[j] > *nrm) *nrm = tmp2[j];
716       }
717       ierr = PetscFree(tmp);CHKERRQ(ierr);
718       ierr = PetscFree(tmp2);CHKERRQ(ierr);
719     } else if (type == NORM_INFINITY) { /* max row sum */
720       PetscReal *sums;
721       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
722       sum  = 0.0;
723       for (j=0; j<amat->mbs; j++) {
724         for (row=0; row<bs; row++) sums[row] = 0.0;
725         v  = amat->a + bs2*amat->i[j];
726         nz = amat->i[j+1]-amat->i[j];
727         for (i=0; i<nz; i++) {
728           for (col=0; col<bs; col++) {
729             for (row=0; row<bs; row++) {
730               sums[row] += PetscAbsScalar(*v); v++;
731             }
732           }
733         }
734         v  = bmat->a + bs2*bmat->i[j];
735         nz = bmat->i[j+1]-bmat->i[j];
736         for (i=0; i<nz; i++) {
737           for (col=0; col<bs; col++) {
738             for (row=0; row<bs; row++) {
739               sums[row] += PetscAbsScalar(*v); v++;
740             }
741           }
742         }
743         for (row=0; row<bs; row++) {
744           if (sums[row] > sum) sum = sums[row];
745         }
746       }
747       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
748       ierr = PetscFree(sums);CHKERRQ(ierr);
749     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
750   }
751   PetscFunctionReturn(0);
752 }
753 
754 /*
755   Creates the hash table, and sets the table
756   This table is created only once.
757   If new entried need to be added to the matrix
758   then the hash table has to be destroyed and
759   recreated.
760 */
761 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
762 {
763   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
764   Mat            A     = baij->A,B=baij->B;
765   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
766   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
767   PetscErrorCode ierr;
768   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
769   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
770   PetscInt       *HT,key;
771   MatScalar      **HD;
772   PetscReal      tmp;
773 #if defined(PETSC_USE_INFO)
774   PetscInt ct=0,max=0;
775 #endif
776 
777   PetscFunctionBegin;
778   if (baij->ht) PetscFunctionReturn(0);
779 
780   baij->ht_size = (PetscInt)(factor*nz);
781   ht_size       = baij->ht_size;
782 
783   /* Allocate Memory for Hash Table */
784   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
785   HD   = baij->hd;
786   HT   = baij->ht;
787 
788   /* Loop Over A */
789   for (i=0; i<a->mbs; i++) {
790     for (j=ai[i]; j<ai[i+1]; j++) {
791       row = i+rstart;
792       col = aj[j]+cstart;
793 
794       key = row*Nbs + col + 1;
795       h1  = HASH(ht_size,key,tmp);
796       for (k=0; k<ht_size; k++) {
797         if (!HT[(h1+k)%ht_size]) {
798           HT[(h1+k)%ht_size] = key;
799           HD[(h1+k)%ht_size] = a->a + j*bs2;
800           break;
801 #if defined(PETSC_USE_INFO)
802         } else {
803           ct++;
804 #endif
805         }
806       }
807 #if defined(PETSC_USE_INFO)
808       if (k> max) max = k;
809 #endif
810     }
811   }
812   /* Loop Over B */
813   for (i=0; i<b->mbs; i++) {
814     for (j=bi[i]; j<bi[i+1]; j++) {
815       row = i+rstart;
816       col = garray[bj[j]];
817       key = row*Nbs + col + 1;
818       h1  = HASH(ht_size,key,tmp);
819       for (k=0; k<ht_size; k++) {
820         if (!HT[(h1+k)%ht_size]) {
821           HT[(h1+k)%ht_size] = key;
822           HD[(h1+k)%ht_size] = b->a + j*bs2;
823           break;
824 #if defined(PETSC_USE_INFO)
825         } else {
826           ct++;
827 #endif
828         }
829       }
830 #if defined(PETSC_USE_INFO)
831       if (k> max) max = k;
832 #endif
833     }
834   }
835 
836   /* Print Summary */
837 #if defined(PETSC_USE_INFO)
838   for (i=0,j=0; i<ht_size; i++) {
839     if (HT[i]) j++;
840   }
841   ierr = PetscInfo(mat,"Average Search = %5.2g,max search = %" PetscInt_FMT "\n",(!j) ? (double)0.0:(double)(((PetscReal)(ct+j))/(double)j),max);CHKERRQ(ierr);
842 #endif
843   PetscFunctionReturn(0);
844 }
845 
846 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
847 {
848   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
849   PetscErrorCode ierr;
850   PetscInt       nstash,reallocs;
851 
852   PetscFunctionBegin;
853   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
854 
855   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
856   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
857   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
858   ierr = PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
859   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
860   ierr = PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 
864 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
865 {
866   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
867   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
868   PetscErrorCode ierr;
869   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
870   PetscInt       *row,*col;
871   PetscBool      r1,r2,r3,other_disassembled;
872   MatScalar      *val;
873   PetscMPIInt    n;
874 
875   PetscFunctionBegin;
876   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
877   if (!baij->donotstash && !mat->nooffprocentries) {
878     while (1) {
879       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
880       if (!flg) break;
881 
882       for (i=0; i<n;) {
883         /* Now identify the consecutive vals belonging to the same row */
884         for (j=i,rstart=row[j]; j<n; j++) {
885           if (row[j] != rstart) break;
886         }
887         if (j < n) ncols = j-i;
888         else       ncols = n-i;
889         /* Now assemble all these values with a single function call */
890         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
891         i    = j;
892       }
893     }
894     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
895     /* Now process the block-stash. Since the values are stashed column-oriented,
896        set the roworiented flag to column oriented, and after MatSetValues()
897        restore the original flags */
898     r1 = baij->roworiented;
899     r2 = a->roworiented;
900     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
901 
902     baij->roworiented = PETSC_FALSE;
903     a->roworiented    = PETSC_FALSE;
904 
905     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
906     while (1) {
907       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
908       if (!flg) break;
909 
910       for (i=0; i<n;) {
911         /* Now identify the consecutive vals belonging to the same row */
912         for (j=i,rstart=row[j]; j<n; j++) {
913           if (row[j] != rstart) break;
914         }
915         if (j < n) ncols = j-i;
916         else       ncols = n-i;
917         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
918         i    = j;
919       }
920     }
921     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
922 
923     baij->roworiented = r1;
924     a->roworiented    = r2;
925 
926     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
927   }
928 
929   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
930   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
931 
932   /* determine if any processor has disassembled, if so we must
933      also disassemble ourselfs, in order that we may reassemble. */
934   /*
935      if nonzero structure of submatrix B cannot change then we know that
936      no processor disassembled thus we can skip this stuff
937   */
938   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
939     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
940     if (mat->was_assembled && !other_disassembled) {
941       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
942     }
943   }
944 
945   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
946     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
947   }
948   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
949   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
950 
951 #if defined(PETSC_USE_INFO)
952   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
953     ierr = PetscInfo(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
954 
955     baij->ht_total_ct  = 0;
956     baij->ht_insert_ct = 0;
957   }
958 #endif
959   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
960     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
961 
962     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
963     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
964   }
965 
966   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
967 
968   baij->rowvalues = NULL;
969 
970   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
971   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
972     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
973     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
974   }
975   PetscFunctionReturn(0);
976 }
977 
978 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
979 #include <petscdraw.h>
980 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
981 {
982   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
983   PetscErrorCode    ierr;
984   PetscMPIInt       rank = baij->rank;
985   PetscInt          bs   = mat->rmap->bs;
986   PetscBool         iascii,isdraw;
987   PetscViewer       sviewer;
988   PetscViewerFormat format;
989 
990   PetscFunctionBegin;
991   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
992   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
993   if (iascii) {
994     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
995     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
996       MatInfo info;
997       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
998       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
999       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1000       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",
1001                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1002       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1003       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1004       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1005       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1006       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1007       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1008       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1009       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1010       PetscFunctionReturn(0);
1011     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1012       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs);CHKERRQ(ierr);
1013       PetscFunctionReturn(0);
1014     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1015       PetscFunctionReturn(0);
1016     }
1017   }
1018 
1019   if (isdraw) {
1020     PetscDraw draw;
1021     PetscBool isnull;
1022     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1023     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1024     if (isnull) PetscFunctionReturn(0);
1025   }
1026 
1027   {
1028     /* assemble the entire matrix onto first processor. */
1029     Mat         A;
1030     Mat_SeqBAIJ *Aloc;
1031     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1032     MatScalar   *a;
1033     const char  *matname;
1034 
1035     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1036     /* Perhaps this should be the type of mat? */
1037     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1038     if (rank == 0) {
1039       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1040     } else {
1041       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1042     }
1043     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1044     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1045     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1046     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1047 
1048     /* copy over the A part */
1049     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1050     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1051     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1052 
1053     for (i=0; i<mbs; i++) {
1054       rvals[0] = bs*(baij->rstartbs + i);
1055       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1056       for (j=ai[i]; j<ai[i+1]; j++) {
1057         col = (baij->cstartbs+aj[j])*bs;
1058         for (k=0; k<bs; k++) {
1059           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1060           col++; a += bs;
1061         }
1062       }
1063     }
1064     /* copy over the B part */
1065     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1066     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1067     for (i=0; i<mbs; i++) {
1068       rvals[0] = bs*(baij->rstartbs + i);
1069       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1070       for (j=ai[i]; j<ai[i+1]; j++) {
1071         col = baij->garray[aj[j]]*bs;
1072         for (k=0; k<bs; k++) {
1073           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1074           col++; a += bs;
1075         }
1076       }
1077     }
1078     ierr = PetscFree(rvals);CHKERRQ(ierr);
1079     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1080     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1081     /*
1082        Everyone has to call to draw the matrix since the graphics waits are
1083        synchronized across all processors that share the PetscDraw object
1084     */
1085     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1086     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1087     if (rank == 0) {
1088       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1089       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1090     }
1091     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1092     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1093     ierr = MatDestroy(&A);CHKERRQ(ierr);
1094   }
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1099 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1100 {
1101   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1102   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1103   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1104   const PetscInt *garray = aij->garray;
1105   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1106   PetscInt       *rowlens,*colidxs;
1107   PetscScalar    *matvals;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1112 
1113   M  = mat->rmap->N;
1114   N  = mat->cmap->N;
1115   m  = mat->rmap->n;
1116   rs = mat->rmap->rstart;
1117   cs = mat->cmap->rstart;
1118   bs = mat->rmap->bs;
1119   nz = bs*bs*(A->nz + B->nz);
1120 
1121   /* write matrix header */
1122   header[0] = MAT_FILE_CLASSID;
1123   header[1] = M; header[2] = N; header[3] = nz;
1124   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1125   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1126 
1127   /* fill in and store row lengths */
1128   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1129   for (cnt=0, i=0; i<A->mbs; i++)
1130     for (j=0; j<bs; j++)
1131       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1132   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1133   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1134 
1135   /* fill in and store column indices */
1136   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1137   for (cnt=0, i=0; i<A->mbs; i++) {
1138     for (k=0; k<bs; k++) {
1139       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1140         if (garray[B->j[jb]] > cs/bs) break;
1141         for (l=0; l<bs; l++)
1142           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1143       }
1144       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1145         for (l=0; l<bs; l++)
1146           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1147       for (; jb<B->i[i+1]; jb++)
1148         for (l=0; l<bs; l++)
1149           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1150     }
1151   }
1152   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1153   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr);
1154   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1155 
1156   /* fill in and store nonzero values */
1157   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1158   for (cnt=0, i=0; i<A->mbs; i++) {
1159     for (k=0; k<bs; k++) {
1160       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1161         if (garray[B->j[jb]] > cs/bs) break;
1162         for (l=0; l<bs; l++)
1163           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1164       }
1165       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1166         for (l=0; l<bs; l++)
1167           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1168       for (; jb<B->i[i+1]; jb++)
1169         for (l=0; l<bs; l++)
1170           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1171     }
1172   }
1173   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr);
1174   ierr = PetscFree(matvals);CHKERRQ(ierr);
1175 
1176   /* write block size option to the viewer's .info file */
1177   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1182 {
1183   PetscErrorCode ierr;
1184   PetscBool      iascii,isdraw,issocket,isbinary;
1185 
1186   PetscFunctionBegin;
1187   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1188   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1189   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1190   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1191   if (iascii || isdraw || issocket) {
1192     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1193   } else if (isbinary) {
1194     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1195   }
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1200 {
1201   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205 #if defined(PETSC_USE_LOG)
1206   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1207 #endif
1208   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1209   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1210   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1211   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1212 #if defined(PETSC_USE_CTABLE)
1213   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1214 #else
1215   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1216 #endif
1217   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1218   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1219   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1220   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1221   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1222   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1223   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1224   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1225 
1226   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1235 #if defined(PETSC_HAVE_HYPRE)
1236   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1237 #endif
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1243 {
1244   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1245   PetscErrorCode ierr;
1246   PetscInt       nt;
1247 
1248   PetscFunctionBegin;
1249   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1250   PetscCheckFalse(nt != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1251   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1252   PetscCheckFalse(nt != A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1253   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1254   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1255   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1256   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1261 {
1262   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1268   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1274 {
1275   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279   /* do nondiagonal part */
1280   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1281   /* do local part */
1282   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1283   /* add partial results together */
1284   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1285   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1290 {
1291   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1292   PetscErrorCode ierr;
1293 
1294   PetscFunctionBegin;
1295   /* do nondiagonal part */
1296   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1297   /* do local part */
1298   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1299   /* add partial results together */
1300   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1301   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 /*
1306   This only works correctly for square matrices where the subblock A->A is the
1307    diagonal block
1308 */
1309 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1310 {
1311   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   PetscCheckFalse(A->rmap->N != A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1316   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1321 {
1322   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1323   PetscErrorCode ierr;
1324 
1325   PetscFunctionBegin;
1326   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1327   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1332 {
1333   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1334   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1335   PetscErrorCode ierr;
1336   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1337   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1338   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1339 
1340   PetscFunctionBegin;
1341   PetscCheckFalse(row < brstart || row >= brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1342   PetscCheckFalse(mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1343   mat->getrowactive = PETSC_TRUE;
1344 
1345   if (!mat->rowvalues && (idx || v)) {
1346     /*
1347         allocate enough space to hold information from the longest row.
1348     */
1349     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1350     PetscInt    max = 1,mbs = mat->mbs,tmp;
1351     for (i=0; i<mbs; i++) {
1352       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1353       if (max < tmp) max = tmp;
1354     }
1355     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1356   }
1357   lrow = row - brstart;
1358 
1359   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1360   if (!v)   {pvA = NULL; pvB = NULL;}
1361   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1362   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1363   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1364   nztot = nzA + nzB;
1365 
1366   cmap = mat->garray;
1367   if (v  || idx) {
1368     if (nztot) {
1369       /* Sort by increasing column numbers, assuming A and B already sorted */
1370       PetscInt imark = -1;
1371       if (v) {
1372         *v = v_p = mat->rowvalues;
1373         for (i=0; i<nzB; i++) {
1374           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1375           else break;
1376         }
1377         imark = i;
1378         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1379         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1380       }
1381       if (idx) {
1382         *idx = idx_p = mat->rowindices;
1383         if (imark > -1) {
1384           for (i=0; i<imark; i++) {
1385             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1386           }
1387         } else {
1388           for (i=0; i<nzB; i++) {
1389             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1390             else break;
1391           }
1392           imark = i;
1393         }
1394         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1395         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1396       }
1397     } else {
1398       if (idx) *idx = NULL;
1399       if (v)   *v   = NULL;
1400     }
1401   }
1402   *nz  = nztot;
1403   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1404   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1409 {
1410   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1411 
1412   PetscFunctionBegin;
1413   PetscCheckFalse(!baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1414   baij->getrowactive = PETSC_FALSE;
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1419 {
1420   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1421   PetscErrorCode ierr;
1422 
1423   PetscFunctionBegin;
1424   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1425   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1430 {
1431   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1432   Mat            A  = a->A,B = a->B;
1433   PetscErrorCode ierr;
1434   PetscLogDouble isend[5],irecv[5];
1435 
1436   PetscFunctionBegin;
1437   info->block_size = (PetscReal)matin->rmap->bs;
1438 
1439   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1440 
1441   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1442   isend[3] = info->memory;  isend[4] = info->mallocs;
1443 
1444   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1445 
1446   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1447   isend[3] += info->memory;  isend[4] += info->mallocs;
1448 
1449   if (flag == MAT_LOCAL) {
1450     info->nz_used      = isend[0];
1451     info->nz_allocated = isend[1];
1452     info->nz_unneeded  = isend[2];
1453     info->memory       = isend[3];
1454     info->mallocs      = isend[4];
1455   } else if (flag == MAT_GLOBAL_MAX) {
1456     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1457 
1458     info->nz_used      = irecv[0];
1459     info->nz_allocated = irecv[1];
1460     info->nz_unneeded  = irecv[2];
1461     info->memory       = irecv[3];
1462     info->mallocs      = irecv[4];
1463   } else if (flag == MAT_GLOBAL_SUM) {
1464     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1465 
1466     info->nz_used      = irecv[0];
1467     info->nz_allocated = irecv[1];
1468     info->nz_unneeded  = irecv[2];
1469     info->memory       = irecv[3];
1470     info->mallocs      = irecv[4];
1471   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1472   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1473   info->fill_ratio_needed = 0;
1474   info->factor_mallocs    = 0;
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1479 {
1480   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1481   PetscErrorCode ierr;
1482 
1483   PetscFunctionBegin;
1484   switch (op) {
1485   case MAT_NEW_NONZERO_LOCATIONS:
1486   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1487   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1488   case MAT_KEEP_NONZERO_PATTERN:
1489   case MAT_NEW_NONZERO_LOCATION_ERR:
1490     MatCheckPreallocated(A,1);
1491     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1492     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1493     break;
1494   case MAT_ROW_ORIENTED:
1495     MatCheckPreallocated(A,1);
1496     a->roworiented = flg;
1497 
1498     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1499     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1500     break;
1501   case MAT_FORCE_DIAGONAL_ENTRIES:
1502   case MAT_SORTED_FULL:
1503     ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1504     break;
1505   case MAT_IGNORE_OFF_PROC_ENTRIES:
1506     a->donotstash = flg;
1507     break;
1508   case MAT_USE_HASH_TABLE:
1509     a->ht_flag = flg;
1510     a->ht_fact = 1.39;
1511     break;
1512   case MAT_SYMMETRIC:
1513   case MAT_STRUCTURALLY_SYMMETRIC:
1514   case MAT_HERMITIAN:
1515   case MAT_SUBMAT_SINGLEIS:
1516   case MAT_SYMMETRY_ETERNAL:
1517     MatCheckPreallocated(A,1);
1518     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1519     break;
1520   default:
1521     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1522   }
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1527 {
1528   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1529   Mat_SeqBAIJ    *Aloc;
1530   Mat            B;
1531   PetscErrorCode ierr;
1532   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1533   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1534   MatScalar      *a;
1535 
1536   PetscFunctionBegin;
1537   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1538     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1539     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1540     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1541     /* Do not know preallocation information, but must set block size */
1542     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1543   } else {
1544     B = *matout;
1545   }
1546 
1547   /* copy over the A part */
1548   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1549   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1550   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1551 
1552   for (i=0; i<mbs; i++) {
1553     rvals[0] = bs*(baij->rstartbs + i);
1554     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1555     for (j=ai[i]; j<ai[i+1]; j++) {
1556       col = (baij->cstartbs+aj[j])*bs;
1557       for (k=0; k<bs; k++) {
1558         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1559 
1560         col++; a += bs;
1561       }
1562     }
1563   }
1564   /* copy over the B part */
1565   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1566   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1567   for (i=0; i<mbs; i++) {
1568     rvals[0] = bs*(baij->rstartbs + i);
1569     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1570     for (j=ai[i]; j<ai[i+1]; j++) {
1571       col = baij->garray[aj[j]]*bs;
1572       for (k=0; k<bs; k++) {
1573         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1574         col++;
1575         a += bs;
1576       }
1577     }
1578   }
1579   ierr = PetscFree(rvals);CHKERRQ(ierr);
1580   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1581   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582 
1583   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1584   else {
1585     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1591 {
1592   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1593   Mat            a     = baij->A,b = baij->B;
1594   PetscErrorCode ierr;
1595   PetscInt       s1,s2,s3;
1596 
1597   PetscFunctionBegin;
1598   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1599   if (rr) {
1600     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1601     PetscCheckFalse(s1!=s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1602     /* Overlap communication with computation. */
1603     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1604   }
1605   if (ll) {
1606     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1607     PetscCheckFalse(s1!=s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1608     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1609   }
1610   /* scale  the diagonal block */
1611   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1612 
1613   if (rr) {
1614     /* Do a scatter end and then right scale the off-diagonal block */
1615     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1616     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1617   }
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1622 {
1623   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1624   PetscInt      *lrows;
1625   PetscInt       r, len;
1626   PetscBool      cong;
1627   PetscErrorCode ierr;
1628 
1629   PetscFunctionBegin;
1630   /* get locally owned rows */
1631   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1632   /* fix right hand side if needed */
1633   if (x && b) {
1634     const PetscScalar *xx;
1635     PetscScalar       *bb;
1636 
1637     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1638     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1639     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1640     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1641     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1642   }
1643 
1644   /* actually zap the local rows */
1645   /*
1646         Zero the required rows. If the "diagonal block" of the matrix
1647      is square and the user wishes to set the diagonal we use separate
1648      code so that MatSetValues() is not called for each diagonal allocating
1649      new memory, thus calling lots of mallocs and slowing things down.
1650 
1651   */
1652   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1653   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1654   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1655   if ((diag != 0.0) && cong) {
1656     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1657   } else if (diag != 0.0) {
1658     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1659     PetscCheckFalse(((Mat_SeqBAIJ*)l->A->data)->nonew,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1660        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1661     for (r = 0; r < len; ++r) {
1662       const PetscInt row = lrows[r] + A->rmap->rstart;
1663       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1664     }
1665     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667   } else {
1668     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1669   }
1670   ierr = PetscFree(lrows);CHKERRQ(ierr);
1671 
1672   /* only change matrix nonzero state if pattern was allowed to be changed */
1673   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1674     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1675     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
1676   }
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1681 {
1682   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1683   PetscErrorCode    ierr;
1684   PetscMPIInt       n = A->rmap->n,p = 0;
1685   PetscInt          i,j,k,r,len = 0,row,col,count;
1686   PetscInt          *lrows,*owners = A->rmap->range;
1687   PetscSFNode       *rrows;
1688   PetscSF           sf;
1689   const PetscScalar *xx;
1690   PetscScalar       *bb,*mask;
1691   Vec               xmask,lmask;
1692   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1693   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1694   PetscScalar       *aa;
1695 
1696   PetscFunctionBegin;
1697   /* Create SF where leaves are input rows and roots are owned rows */
1698   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1699   for (r = 0; r < n; ++r) lrows[r] = -1;
1700   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1701   for (r = 0; r < N; ++r) {
1702     const PetscInt idx   = rows[r];
1703     PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
1704     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1705       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1706     }
1707     rrows[r].rank  = p;
1708     rrows[r].index = rows[r] - owners[p];
1709   }
1710   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1711   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1712   /* Collect flags for rows to be zeroed */
1713   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1714   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1715   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1716   /* Compress and put in row numbers */
1717   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1718   /* zero diagonal part of matrix */
1719   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1720   /* handle off diagonal part of matrix */
1721   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1722   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1723   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1724   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1725   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1726   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1727   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1729   if (x) {
1730     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1731     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1732     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1733     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1734   }
1735   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1736   /* remove zeroed rows of off diagonal matrix */
1737   for (i = 0; i < len; ++i) {
1738     row   = lrows[i];
1739     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1740     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1741     for (k = 0; k < count; ++k) {
1742       aa[0] = 0.0;
1743       aa   += bs;
1744     }
1745   }
1746   /* loop over all elements of off process part of matrix zeroing removed columns*/
1747   for (i = 0; i < l->B->rmap->N; ++i) {
1748     row = i/bs;
1749     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1750       for (k = 0; k < bs; ++k) {
1751         col = bs*baij->j[j] + k;
1752         if (PetscAbsScalar(mask[col])) {
1753           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1754           if (x) bb[i] -= aa[0]*xx[col];
1755           aa[0] = 0.0;
1756         }
1757       }
1758     }
1759   }
1760   if (x) {
1761     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1762     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1763   }
1764   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1765   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1766   ierr = PetscFree(lrows);CHKERRQ(ierr);
1767 
1768   /* only change matrix nonzero state if pattern was allowed to be changed */
1769   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1770     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1771     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
1772   }
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1777 {
1778   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1779   PetscErrorCode ierr;
1780 
1781   PetscFunctionBegin;
1782   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1787 
1788 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1789 {
1790   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1791   Mat            a,b,c,d;
1792   PetscBool      flg;
1793   PetscErrorCode ierr;
1794 
1795   PetscFunctionBegin;
1796   a = matA->A; b = matA->B;
1797   c = matB->A; d = matB->B;
1798 
1799   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1800   if (flg) {
1801     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1802   }
1803   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1808 {
1809   PetscErrorCode ierr;
1810   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1811   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1812 
1813   PetscFunctionBegin;
1814   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1815   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1816     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1817   } else {
1818     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1819     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1820   }
1821   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1826 {
1827   PetscErrorCode ierr;
1828 
1829   PetscFunctionBegin;
1830   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1835 {
1836   PetscErrorCode ierr;
1837   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1838   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1839   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1840 
1841   PetscFunctionBegin;
1842   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1847 {
1848   PetscErrorCode ierr;
1849   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1850   PetscBLASInt   bnz,one=1;
1851   Mat_SeqBAIJ    *x,*y;
1852   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1853 
1854   PetscFunctionBegin;
1855   if (str == SAME_NONZERO_PATTERN) {
1856     PetscScalar alpha = a;
1857     x    = (Mat_SeqBAIJ*)xx->A->data;
1858     y    = (Mat_SeqBAIJ*)yy->A->data;
1859     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1860     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1861     x    = (Mat_SeqBAIJ*)xx->B->data;
1862     y    = (Mat_SeqBAIJ*)yy->B->data;
1863     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1864     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1865     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1866   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1867     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1868   } else {
1869     Mat      B;
1870     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1871     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1872     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1873     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1874     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1875     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1876     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1877     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
1878     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1879     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1880     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1881     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1882     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1883     ierr = MatHeaderMerge(Y,&B);CHKERRQ(ierr);
1884     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1885     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1886   }
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1891 {
1892 #if defined(PETSC_USE_COMPLEX)
1893   PetscErrorCode ierr;
1894   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1895 
1896   PetscFunctionBegin;
1897   ierr = MatConjugate_SeqBAIJ(a->A);CHKERRQ(ierr);
1898   ierr = MatConjugate_SeqBAIJ(a->B);CHKERRQ(ierr);
1899 #else
1900   PetscFunctionBegin;
1901 #endif
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1906 {
1907   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1908   PetscErrorCode ierr;
1909 
1910   PetscFunctionBegin;
1911   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1912   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1917 {
1918   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1919   PetscErrorCode ierr;
1920 
1921   PetscFunctionBegin;
1922   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1923   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1928 {
1929   PetscErrorCode ierr;
1930   IS             iscol_local;
1931   PetscInt       csize;
1932 
1933   PetscFunctionBegin;
1934   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1935   if (call == MAT_REUSE_MATRIX) {
1936     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1937     PetscCheckFalse(!iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1938   } else {
1939     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1940   }
1941   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1942   if (call == MAT_INITIAL_MATRIX) {
1943     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1944     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1945   }
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 /*
1950   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1951   in local and then by concatenating the local matrices the end result.
1952   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1953   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1954 */
1955 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1956 {
1957   PetscErrorCode ierr;
1958   PetscMPIInt    rank,size;
1959   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1960   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1961   Mat            M,Mreuse;
1962   MatScalar      *vwork,*aa;
1963   MPI_Comm       comm;
1964   IS             isrow_new, iscol_new;
1965   Mat_SeqBAIJ    *aij;
1966 
1967   PetscFunctionBegin;
1968   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
1969   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
1970   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1971   /* The compression and expansion should be avoided. Doesn't point
1972      out errors, might change the indices, hence buggey */
1973   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
1974   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
1975 
1976   if (call ==  MAT_REUSE_MATRIX) {
1977     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1978     PetscCheckFalse(!Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1979     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
1980   } else {
1981     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
1982   }
1983   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
1984   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
1985   /*
1986       m - number of local rows
1987       n - number of columns (same on all processors)
1988       rstart - first row in new global matrix generated
1989   */
1990   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1991   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1992   m    = m/bs;
1993   n    = n/bs;
1994 
1995   if (call == MAT_INITIAL_MATRIX) {
1996     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1997     ii  = aij->i;
1998     jj  = aij->j;
1999 
2000     /*
2001         Determine the number of non-zeros in the diagonal and off-diagonal
2002         portions of the matrix in order to do correct preallocation
2003     */
2004 
2005     /* first get start and end of "diagonal" columns */
2006     if (csize == PETSC_DECIDE) {
2007       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2008       if (mglobal == n*bs) { /* square matrix */
2009         nlocal = m;
2010       } else {
2011         nlocal = n/size + ((n % size) > rank);
2012       }
2013     } else {
2014       nlocal = csize/bs;
2015     }
2016     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
2017     rstart = rend - nlocal;
2018     PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
2019 
2020     /* next, compute all the lengths */
2021     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2022     for (i=0; i<m; i++) {
2023       jend = ii[i+1] - ii[i];
2024       olen = 0;
2025       dlen = 0;
2026       for (j=0; j<jend; j++) {
2027         if (*jj < rstart || *jj >= rend) olen++;
2028         else dlen++;
2029         jj++;
2030       }
2031       olens[i] = olen;
2032       dlens[i] = dlen;
2033     }
2034     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2035     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2036     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2037     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2038     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2039     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2040   } else {
2041     PetscInt ml,nl;
2042 
2043     M    = *newmat;
2044     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2045     PetscCheckFalse(ml != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2046     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2047     /*
2048          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2049        rather than the slower MatSetValues().
2050     */
2051     M->was_assembled = PETSC_TRUE;
2052     M->assembled     = PETSC_FALSE;
2053   }
2054   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2055   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2056   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2057   ii   = aij->i;
2058   jj   = aij->j;
2059   aa   = aij->a;
2060   for (i=0; i<m; i++) {
2061     row   = rstart/bs + i;
2062     nz    = ii[i+1] - ii[i];
2063     cwork = jj;     jj += nz;
2064     vwork = aa;     aa += nz*bs*bs;
2065     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2066   }
2067 
2068   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2069   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2070   *newmat = M;
2071 
2072   /* save submatrix used in processor for next request */
2073   if (call ==  MAT_INITIAL_MATRIX) {
2074     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2075     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2076   }
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2081 {
2082   MPI_Comm       comm,pcomm;
2083   PetscInt       clocal_size,nrows;
2084   const PetscInt *rows;
2085   PetscMPIInt    size;
2086   IS             crowp,lcolp;
2087   PetscErrorCode ierr;
2088 
2089   PetscFunctionBegin;
2090   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2091   /* make a collective version of 'rowp' */
2092   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2093   if (pcomm==comm) {
2094     crowp = rowp;
2095   } else {
2096     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2097     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2098     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2099     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2100   }
2101   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2102   /* make a local version of 'colp' */
2103   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2104   ierr = MPI_Comm_size(pcomm,&size);CHKERRMPI(ierr);
2105   if (size==1) {
2106     lcolp = colp;
2107   } else {
2108     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2109   }
2110   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2111   /* now we just get the submatrix */
2112   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2113   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2114   /* clean up */
2115   if (pcomm!=comm) {
2116     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2117   }
2118   if (size>1) {
2119     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2120   }
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2125 {
2126   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2127   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2128 
2129   PetscFunctionBegin;
2130   if (nghosts) *nghosts = B->nbs;
2131   if (ghosts) *ghosts = baij->garray;
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2136 {
2137   Mat            B;
2138   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2139   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2140   Mat_SeqAIJ     *b;
2141   PetscErrorCode ierr;
2142   PetscMPIInt    size,rank,*recvcounts = NULL,*displs = NULL;
2143   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2144   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2145 
2146   PetscFunctionBegin;
2147   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
2148   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
2149 
2150   /* ----------------------------------------------------------------
2151      Tell every processor the number of nonzeros per row
2152   */
2153   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2154   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2155     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2156   }
2157   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2158   displs    = recvcounts + size;
2159   for (i=0; i<size; i++) {
2160     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2161     displs[i]     = A->rmap->range[i]/bs;
2162   }
2163   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2164   /* ---------------------------------------------------------------
2165      Create the sequential matrix of the same type as the local block diagonal
2166   */
2167   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2168   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2169   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2170   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2171   b    = (Mat_SeqAIJ*)B->data;
2172 
2173   /*--------------------------------------------------------------------
2174     Copy my part of matrix column indices over
2175   */
2176   sendcount  = ad->nz + bd->nz;
2177   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2178   a_jsendbuf = ad->j;
2179   b_jsendbuf = bd->j;
2180   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2181   cnt        = 0;
2182   for (i=0; i<n; i++) {
2183 
2184     /* put in lower diagonal portion */
2185     m = bd->i[i+1] - bd->i[i];
2186     while (m > 0) {
2187       /* is it above diagonal (in bd (compressed) numbering) */
2188       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2189       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2190       m--;
2191     }
2192 
2193     /* put in diagonal portion */
2194     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2195       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2196     }
2197 
2198     /* put in upper diagonal portion */
2199     while (m-- > 0) {
2200       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2201     }
2202   }
2203   PetscCheckFalse(cnt != sendcount,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT,sendcount,cnt);
2204 
2205   /*--------------------------------------------------------------------
2206     Gather all column indices to all processors
2207   */
2208   for (i=0; i<size; i++) {
2209     recvcounts[i] = 0;
2210     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2211       recvcounts[i] += lens[j];
2212     }
2213   }
2214   displs[0] = 0;
2215   for (i=1; i<size; i++) {
2216     displs[i] = displs[i-1] + recvcounts[i-1];
2217   }
2218   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2219   /*--------------------------------------------------------------------
2220     Assemble the matrix into useable form (note numerical values not yet set)
2221   */
2222   /* set the b->ilen (length of each row) values */
2223   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2224   /* set the b->i indices */
2225   b->i[0] = 0;
2226   for (i=1; i<=A->rmap->N/bs; i++) {
2227     b->i[i] = b->i[i-1] + lens[i-1];
2228   }
2229   ierr = PetscFree(lens);CHKERRQ(ierr);
2230   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2231   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2232   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2233 
2234   if (A->symmetric) {
2235     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2236   } else if (A->hermitian) {
2237     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2238   } else if (A->structurally_symmetric) {
2239     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2240   }
2241   *newmat = B;
2242   PetscFunctionReturn(0);
2243 }
2244 
2245 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2246 {
2247   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2248   PetscErrorCode ierr;
2249   Vec            bb1 = NULL;
2250 
2251   PetscFunctionBegin;
2252   if (flag == SOR_APPLY_UPPER) {
2253     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2254     PetscFunctionReturn(0);
2255   }
2256 
2257   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2258     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2259   }
2260 
2261   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2262     if (flag & SOR_ZERO_INITIAL_GUESS) {
2263       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2264       its--;
2265     }
2266 
2267     while (its--) {
2268       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2269       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2270 
2271       /* update rhs: bb1 = bb - B*x */
2272       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2273       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2274 
2275       /* local sweep */
2276       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2277     }
2278   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2279     if (flag & SOR_ZERO_INITIAL_GUESS) {
2280       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2281       its--;
2282     }
2283     while (its--) {
2284       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2285       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2286 
2287       /* update rhs: bb1 = bb - B*x */
2288       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2289       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2290 
2291       /* local sweep */
2292       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2293     }
2294   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2295     if (flag & SOR_ZERO_INITIAL_GUESS) {
2296       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2297       its--;
2298     }
2299     while (its--) {
2300       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2301       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2302 
2303       /* update rhs: bb1 = bb - B*x */
2304       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2305       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2306 
2307       /* local sweep */
2308       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2309     }
2310   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2311 
2312   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal *reductions)
2317 {
2318   PetscErrorCode ierr;
2319   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2320   PetscInt       m,N,i,*garray = aij->garray;
2321   PetscInt       ib,jb,bs = A->rmap->bs;
2322   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2323   MatScalar      *a_val = a_aij->a;
2324   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2325   MatScalar      *b_val = b_aij->a;
2326   PetscReal      *work;
2327 
2328   PetscFunctionBegin;
2329   ierr = MatGetSize(A,&m,&N);CHKERRQ(ierr);
2330   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2331   if (type == NORM_2) {
2332     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2333       for (jb=0; jb<bs; jb++) {
2334         for (ib=0; ib<bs; ib++) {
2335           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2336           a_val++;
2337         }
2338       }
2339     }
2340     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2341       for (jb=0; jb<bs; jb++) {
2342         for (ib=0; ib<bs; ib++) {
2343           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2344           b_val++;
2345         }
2346       }
2347     }
2348   } else if (type == NORM_1) {
2349     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2350       for (jb=0; jb<bs; jb++) {
2351         for (ib=0; ib<bs; ib++) {
2352           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2353           a_val++;
2354         }
2355       }
2356     }
2357     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2358       for (jb=0; jb<bs; jb++) {
2359        for (ib=0; ib<bs; ib++) {
2360           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2361           b_val++;
2362         }
2363       }
2364     }
2365   } else if (type == NORM_INFINITY) {
2366     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2367       for (jb=0; jb<bs; jb++) {
2368         for (ib=0; ib<bs; ib++) {
2369           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2370           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2371           a_val++;
2372         }
2373       }
2374     }
2375     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2376       for (jb=0; jb<bs; jb++) {
2377         for (ib=0; ib<bs; ib++) {
2378           int col = garray[b_aij->j[i]] * bs + jb;
2379           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2380           b_val++;
2381         }
2382       }
2383     }
2384   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2385     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2386       for (jb=0; jb<bs; jb++) {
2387         for (ib=0; ib<bs; ib++) {
2388           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2389           a_val++;
2390         }
2391       }
2392     }
2393     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2394       for (jb=0; jb<bs; jb++) {
2395        for (ib=0; ib<bs; ib++) {
2396           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2397           b_val++;
2398         }
2399       }
2400     }
2401   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2402     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2403       for (jb=0; jb<bs; jb++) {
2404         for (ib=0; ib<bs; ib++) {
2405           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2406           a_val++;
2407         }
2408       }
2409     }
2410     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2411       for (jb=0; jb<bs; jb++) {
2412        for (ib=0; ib<bs; ib++) {
2413           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2414           b_val++;
2415         }
2416       }
2417     }
2418   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
2419   if (type == NORM_INFINITY) {
2420     ierr = MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2421   } else {
2422     ierr = MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2423   }
2424   ierr = PetscFree(work);CHKERRQ(ierr);
2425   if (type == NORM_2) {
2426     for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2427   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2428     for (i=0; i<N; i++) reductions[i] /= m;
2429   }
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2434 {
2435   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2436   PetscErrorCode ierr;
2437 
2438   PetscFunctionBegin;
2439   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2440   A->factorerrortype             = a->A->factorerrortype;
2441   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2442   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2447 {
2448   PetscErrorCode ierr;
2449   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2450   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2451 
2452   PetscFunctionBegin;
2453   if (!Y->preallocated) {
2454     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2455   } else if (!aij->nz) {
2456     PetscInt nonew = aij->nonew;
2457     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2458     aij->nonew = nonew;
2459   }
2460   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2465 {
2466   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2467   PetscErrorCode ierr;
2468 
2469   PetscFunctionBegin;
2470   PetscCheckFalse(A->rmap->n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2471   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2472   if (d) {
2473     PetscInt rstart;
2474     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2475     *d += rstart/A->rmap->bs;
2476 
2477   }
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2482 {
2483   PetscFunctionBegin;
2484   *a = ((Mat_MPIBAIJ*)A->data)->A;
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 /* -------------------------------------------------------------------*/
2489 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2490                                        MatGetRow_MPIBAIJ,
2491                                        MatRestoreRow_MPIBAIJ,
2492                                        MatMult_MPIBAIJ,
2493                                 /* 4*/ MatMultAdd_MPIBAIJ,
2494                                        MatMultTranspose_MPIBAIJ,
2495                                        MatMultTransposeAdd_MPIBAIJ,
2496                                        NULL,
2497                                        NULL,
2498                                        NULL,
2499                                 /*10*/ NULL,
2500                                        NULL,
2501                                        NULL,
2502                                        MatSOR_MPIBAIJ,
2503                                        MatTranspose_MPIBAIJ,
2504                                 /*15*/ MatGetInfo_MPIBAIJ,
2505                                        MatEqual_MPIBAIJ,
2506                                        MatGetDiagonal_MPIBAIJ,
2507                                        MatDiagonalScale_MPIBAIJ,
2508                                        MatNorm_MPIBAIJ,
2509                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2510                                        MatAssemblyEnd_MPIBAIJ,
2511                                        MatSetOption_MPIBAIJ,
2512                                        MatZeroEntries_MPIBAIJ,
2513                                 /*24*/ MatZeroRows_MPIBAIJ,
2514                                        NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        NULL,
2518                                 /*29*/ MatSetUp_MPIBAIJ,
2519                                        NULL,
2520                                        NULL,
2521                                        MatGetDiagonalBlock_MPIBAIJ,
2522                                        NULL,
2523                                 /*34*/ MatDuplicate_MPIBAIJ,
2524                                        NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        NULL,
2528                                 /*39*/ MatAXPY_MPIBAIJ,
2529                                        MatCreateSubMatrices_MPIBAIJ,
2530                                        MatIncreaseOverlap_MPIBAIJ,
2531                                        MatGetValues_MPIBAIJ,
2532                                        MatCopy_MPIBAIJ,
2533                                 /*44*/ NULL,
2534                                        MatScale_MPIBAIJ,
2535                                        MatShift_MPIBAIJ,
2536                                        NULL,
2537                                        MatZeroRowsColumns_MPIBAIJ,
2538                                 /*49*/ NULL,
2539                                        NULL,
2540                                        NULL,
2541                                        NULL,
2542                                        NULL,
2543                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2544                                        NULL,
2545                                        MatSetUnfactored_MPIBAIJ,
2546                                        MatPermute_MPIBAIJ,
2547                                        MatSetValuesBlocked_MPIBAIJ,
2548                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2549                                        MatDestroy_MPIBAIJ,
2550                                        MatView_MPIBAIJ,
2551                                        NULL,
2552                                        NULL,
2553                                 /*64*/ NULL,
2554                                        NULL,
2555                                        NULL,
2556                                        NULL,
2557                                        NULL,
2558                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2559                                        NULL,
2560                                        NULL,
2561                                        NULL,
2562                                        NULL,
2563                                 /*74*/ NULL,
2564                                        MatFDColoringApply_BAIJ,
2565                                        NULL,
2566                                        NULL,
2567                                        NULL,
2568                                 /*79*/ NULL,
2569                                        NULL,
2570                                        NULL,
2571                                        NULL,
2572                                        MatLoad_MPIBAIJ,
2573                                 /*84*/ NULL,
2574                                        NULL,
2575                                        NULL,
2576                                        NULL,
2577                                        NULL,
2578                                 /*89*/ NULL,
2579                                        NULL,
2580                                        NULL,
2581                                        NULL,
2582                                        NULL,
2583                                 /*94*/ NULL,
2584                                        NULL,
2585                                        NULL,
2586                                        NULL,
2587                                        NULL,
2588                                 /*99*/ NULL,
2589                                        NULL,
2590                                        NULL,
2591                                        MatConjugate_MPIBAIJ,
2592                                        NULL,
2593                                 /*104*/NULL,
2594                                        MatRealPart_MPIBAIJ,
2595                                        MatImaginaryPart_MPIBAIJ,
2596                                        NULL,
2597                                        NULL,
2598                                 /*109*/NULL,
2599                                        NULL,
2600                                        NULL,
2601                                        NULL,
2602                                        MatMissingDiagonal_MPIBAIJ,
2603                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2604                                        NULL,
2605                                        MatGetGhosts_MPIBAIJ,
2606                                        NULL,
2607                                        NULL,
2608                                 /*119*/NULL,
2609                                        NULL,
2610                                        NULL,
2611                                        NULL,
2612                                        MatGetMultiProcBlock_MPIBAIJ,
2613                                 /*124*/NULL,
2614                                        MatGetColumnReductions_MPIBAIJ,
2615                                        MatInvertBlockDiagonal_MPIBAIJ,
2616                                        NULL,
2617                                        NULL,
2618                                /*129*/ NULL,
2619                                        NULL,
2620                                        NULL,
2621                                        NULL,
2622                                        NULL,
2623                                /*134*/ NULL,
2624                                        NULL,
2625                                        NULL,
2626                                        NULL,
2627                                        NULL,
2628                                /*139*/ MatSetBlockSizes_Default,
2629                                        NULL,
2630                                        NULL,
2631                                        MatFDColoringSetUp_MPIXAIJ,
2632                                        NULL,
2633                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2634 };
2635 
2636 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2637 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2638 
2639 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2640 {
2641   PetscInt       m,rstart,cstart,cend;
2642   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2643   const PetscInt *JJ    =NULL;
2644   PetscScalar    *values=NULL;
2645   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2646   PetscErrorCode ierr;
2647   PetscBool      nooffprocentries;
2648 
2649   PetscFunctionBegin;
2650   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2651   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2652   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2653   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2654   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2655   m      = B->rmap->n/bs;
2656   rstart = B->rmap->rstart/bs;
2657   cstart = B->cmap->rstart/bs;
2658   cend   = B->cmap->rend/bs;
2659 
2660   PetscCheckFalse(ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
2661   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2662   for (i=0; i<m; i++) {
2663     nz = ii[i+1] - ii[i];
2664     PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
2665     nz_max = PetscMax(nz_max,nz);
2666     dlen   = 0;
2667     olen   = 0;
2668     JJ     = jj + ii[i];
2669     for (j=0; j<nz; j++) {
2670       if (*JJ < cstart || *JJ >= cend) olen++;
2671       else dlen++;
2672       JJ++;
2673     }
2674     d_nnz[i] = dlen;
2675     o_nnz[i] = olen;
2676   }
2677   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2678   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2679 
2680   values = (PetscScalar*)V;
2681   if (!values) {
2682     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2683   }
2684   for (i=0; i<m; i++) {
2685     PetscInt          row    = i + rstart;
2686     PetscInt          ncols  = ii[i+1] - ii[i];
2687     const PetscInt    *icols = jj + ii[i];
2688     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2689       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2690       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2691     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2692       PetscInt j;
2693       for (j=0; j<ncols; j++) {
2694         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2695         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2696       }
2697     }
2698   }
2699 
2700   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2701   nooffprocentries    = B->nooffprocentries;
2702   B->nooffprocentries = PETSC_TRUE;
2703   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2704   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2705   B->nooffprocentries = nooffprocentries;
2706 
2707   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2708   PetscFunctionReturn(0);
2709 }
2710 
2711 /*@C
2712    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2713 
2714    Collective
2715 
2716    Input Parameters:
2717 +  B - the matrix
2718 .  bs - the block size
2719 .  i - the indices into j for the start of each local row (starts with zero)
2720 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2721 -  v - optional values in the matrix
2722 
2723    Level: advanced
2724 
2725    Notes:
2726     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2727    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2728    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2729    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2730    block column and the second index is over columns within a block.
2731 
2732    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2733 
2734 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2735 @*/
2736 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2737 {
2738   PetscErrorCode ierr;
2739 
2740   PetscFunctionBegin;
2741   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2742   PetscValidType(B,1);
2743   PetscValidLogicalCollectiveInt(B,bs,2);
2744   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2745   PetscFunctionReturn(0);
2746 }
2747 
2748 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2749 {
2750   Mat_MPIBAIJ    *b;
2751   PetscErrorCode ierr;
2752   PetscInt       i;
2753   PetscMPIInt    size;
2754 
2755   PetscFunctionBegin;
2756   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2757   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2758   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2759   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2760 
2761   if (d_nnz) {
2762     for (i=0; i<B->rmap->n/bs; i++) {
2763       PetscCheckFalse(d_nnz[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,d_nnz[i]);
2764     }
2765   }
2766   if (o_nnz) {
2767     for (i=0; i<B->rmap->n/bs; i++) {
2768       PetscCheckFalse(o_nnz[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,o_nnz[i]);
2769     }
2770   }
2771 
2772   b      = (Mat_MPIBAIJ*)B->data;
2773   b->bs2 = bs*bs;
2774   b->mbs = B->rmap->n/bs;
2775   b->nbs = B->cmap->n/bs;
2776   b->Mbs = B->rmap->N/bs;
2777   b->Nbs = B->cmap->N/bs;
2778 
2779   for (i=0; i<=b->size; i++) {
2780     b->rangebs[i] = B->rmap->range[i]/bs;
2781   }
2782   b->rstartbs = B->rmap->rstart/bs;
2783   b->rendbs   = B->rmap->rend/bs;
2784   b->cstartbs = B->cmap->rstart/bs;
2785   b->cendbs   = B->cmap->rend/bs;
2786 
2787 #if defined(PETSC_USE_CTABLE)
2788   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2789 #else
2790   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2791 #endif
2792   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2793   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2794   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2795 
2796   /* Because the B will have been resized we simply destroy it and create a new one each time */
2797   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2798   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2799   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2800   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2801   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2802   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2803 
2804   if (!B->preallocated) {
2805     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2806     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2807     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2808     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2809     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2810   }
2811 
2812   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2813   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2814   B->preallocated  = PETSC_TRUE;
2815   B->was_assembled = PETSC_FALSE;
2816   B->assembled     = PETSC_FALSE;
2817   PetscFunctionReturn(0);
2818 }
2819 
2820 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2821 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2822 
2823 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2824 {
2825   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2826   PetscErrorCode ierr;
2827   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2828   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2829   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2830 
2831   PetscFunctionBegin;
2832   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2833   ii[0] = 0;
2834   for (i=0; i<M; i++) {
2835     PetscCheckFalse((id[i+1] - id[i]) < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,id[i],id[i+1]);
2836     PetscCheckFalse((io[i+1] - io[i]) < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,io[i],io[i+1]);
2837     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2838     /* remove one from count of matrix has diagonal */
2839     for (j=id[i]; j<id[i+1]; j++) {
2840       if (jd[j] == i) {ii[i+1]--;break;}
2841     }
2842   }
2843   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2844   cnt  = 0;
2845   for (i=0; i<M; i++) {
2846     for (j=io[i]; j<io[i+1]; j++) {
2847       if (garray[jo[j]] > rstart) break;
2848       jj[cnt++] = garray[jo[j]];
2849     }
2850     for (k=id[i]; k<id[i+1]; k++) {
2851       if (jd[k] != i) {
2852         jj[cnt++] = rstart + jd[k];
2853       }
2854     }
2855     for (; j<io[i+1]; j++) {
2856       jj[cnt++] = garray[jo[j]];
2857     }
2858   }
2859   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2860   PetscFunctionReturn(0);
2861 }
2862 
2863 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2864 
2865 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2866 
2867 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2868 {
2869   PetscErrorCode ierr;
2870   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2871   Mat            B;
2872   Mat_MPIAIJ     *b;
2873 
2874   PetscFunctionBegin;
2875   PetscCheckFalse(!A->assembled,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2876 
2877   if (reuse == MAT_REUSE_MATRIX) {
2878     B = *newmat;
2879   } else {
2880     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2881     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2882     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2883     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2884     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2885     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2886   }
2887   b = (Mat_MPIAIJ*) B->data;
2888 
2889   if (reuse == MAT_REUSE_MATRIX) {
2890     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2891     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2892   } else {
2893     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2894     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2895     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2896     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2897     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2898     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2899     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2900   }
2901   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2902   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2903 
2904   if (reuse == MAT_INPLACE_MATRIX) {
2905     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2906   } else {
2907    *newmat = B;
2908   }
2909   PetscFunctionReturn(0);
2910 }
2911 
2912 /*MC
2913    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2914 
2915    Options Database Keys:
2916 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2917 . -mat_block_size <bs> - set the blocksize used to store the matrix
2918 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2919 - -mat_use_hash_table <fact>
2920 
2921    Level: beginner
2922 
2923    Notes:
2924     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2925     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2926 
2927 .seealso: MatCreateBAIJ
2928 M*/
2929 
2930 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2931 
2932 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2933 {
2934   Mat_MPIBAIJ    *b;
2935   PetscErrorCode ierr;
2936   PetscBool      flg = PETSC_FALSE;
2937 
2938   PetscFunctionBegin;
2939   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2940   B->data = (void*)b;
2941 
2942   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2943   B->assembled = PETSC_FALSE;
2944 
2945   B->insertmode = NOT_SET_VALUES;
2946   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
2947   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRMPI(ierr);
2948 
2949   /* build local table of row and column ownerships */
2950   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
2951 
2952   /* build cache for off array entries formed */
2953   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2954 
2955   b->donotstash  = PETSC_FALSE;
2956   b->colmap      = NULL;
2957   b->garray      = NULL;
2958   b->roworiented = PETSC_TRUE;
2959 
2960   /* stuff used in block assembly */
2961   b->barray = NULL;
2962 
2963   /* stuff used for matrix vector multiply */
2964   b->lvec  = NULL;
2965   b->Mvctx = NULL;
2966 
2967   /* stuff for MatGetRow() */
2968   b->rowindices   = NULL;
2969   b->rowvalues    = NULL;
2970   b->getrowactive = PETSC_FALSE;
2971 
2972   /* hash table stuff */
2973   b->ht           = NULL;
2974   b->hd           = NULL;
2975   b->ht_size      = 0;
2976   b->ht_flag      = PETSC_FALSE;
2977   b->ht_fact      = 0;
2978   b->ht_total_ct  = 0;
2979   b->ht_insert_ct = 0;
2980 
2981   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2982   b->ijonly = PETSC_FALSE;
2983 
2984   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2985   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2986   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
2987 #if defined(PETSC_HAVE_HYPRE)
2988   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
2989 #endif
2990   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2991   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2992   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2993   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2994   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2995   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2996   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
2997   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2998 
2999   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3000   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
3001   if (flg) {
3002     PetscReal fact = 1.39;
3003     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3004     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3005     if (fact <= 1.0) fact = 1.39;
3006     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3007     ierr = PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact);CHKERRQ(ierr);
3008   }
3009   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3010   PetscFunctionReturn(0);
3011 }
3012 
3013 /*MC
3014    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3015 
3016    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3017    and MATMPIBAIJ otherwise.
3018 
3019    Options Database Keys:
3020 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3021 
3022   Level: beginner
3023 
3024 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3025 M*/
3026 
3027 /*@C
3028    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3029    (block compressed row).  For good matrix assembly performance
3030    the user should preallocate the matrix storage by setting the parameters
3031    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3032    performance can be increased by more than a factor of 50.
3033 
3034    Collective on Mat
3035 
3036    Input Parameters:
3037 +  B - the matrix
3038 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3039           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3040 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3041            submatrix  (same for all local rows)
3042 .  d_nnz - array containing the number of block nonzeros in the various block rows
3043            of the in diagonal portion of the local (possibly different for each block
3044            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3045            set it even if it is zero.
3046 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3047            submatrix (same for all local rows).
3048 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3049            off-diagonal portion of the local submatrix (possibly different for
3050            each block row) or NULL.
3051 
3052    If the *_nnz parameter is given then the *_nz parameter is ignored
3053 
3054    Options Database Keys:
3055 +   -mat_block_size - size of the blocks to use
3056 -   -mat_use_hash_table <fact>
3057 
3058    Notes:
3059    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3060    than it must be used on all processors that share the object for that argument.
3061 
3062    Storage Information:
3063    For a square global matrix we define each processor's diagonal portion
3064    to be its local rows and the corresponding columns (a square submatrix);
3065    each processor's off-diagonal portion encompasses the remainder of the
3066    local matrix (a rectangular submatrix).
3067 
3068    The user can specify preallocated storage for the diagonal part of
3069    the local submatrix with either d_nz or d_nnz (not both).  Set
3070    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3071    memory allocation.  Likewise, specify preallocated storage for the
3072    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3073 
3074    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3075    the figure below we depict these three local rows and all columns (0-11).
3076 
3077 .vb
3078            0 1 2 3 4 5 6 7 8 9 10 11
3079           --------------------------
3080    row 3  |o o o d d d o o o o  o  o
3081    row 4  |o o o d d d o o o o  o  o
3082    row 5  |o o o d d d o o o o  o  o
3083           --------------------------
3084 .ve
3085 
3086    Thus, any entries in the d locations are stored in the d (diagonal)
3087    submatrix, and any entries in the o locations are stored in the
3088    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3089    stored simply in the MATSEQBAIJ format for compressed row storage.
3090 
3091    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3092    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3093    In general, for PDE problems in which most nonzeros are near the diagonal,
3094    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3095    or you will get TERRIBLE performance; see the users' manual chapter on
3096    matrices.
3097 
3098    You can call MatGetInfo() to get information on how effective the preallocation was;
3099    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3100    You can also run with the option -info and look for messages with the string
3101    malloc in them to see if additional memory allocation was needed.
3102 
3103    Level: intermediate
3104 
3105 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3106 @*/
3107 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3108 {
3109   PetscErrorCode ierr;
3110 
3111   PetscFunctionBegin;
3112   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3113   PetscValidType(B,1);
3114   PetscValidLogicalCollectiveInt(B,bs,2);
3115   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3116   PetscFunctionReturn(0);
3117 }
3118 
3119 /*@C
3120    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3121    (block compressed row).  For good matrix assembly performance
3122    the user should preallocate the matrix storage by setting the parameters
3123    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3124    performance can be increased by more than a factor of 50.
3125 
3126    Collective
3127 
3128    Input Parameters:
3129 +  comm - MPI communicator
3130 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3131           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3132 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3133            This value should be the same as the local size used in creating the
3134            y vector for the matrix-vector product y = Ax.
3135 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3136            This value should be the same as the local size used in creating the
3137            x vector for the matrix-vector product y = Ax.
3138 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3139 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3140 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3141            submatrix  (same for all local rows)
3142 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3143            of the in diagonal portion of the local (possibly different for each block
3144            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3145            and set it even if it is zero.
3146 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3147            submatrix (same for all local rows).
3148 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3149            off-diagonal portion of the local submatrix (possibly different for
3150            each block row) or NULL.
3151 
3152    Output Parameter:
3153 .  A - the matrix
3154 
3155    Options Database Keys:
3156 +   -mat_block_size - size of the blocks to use
3157 -   -mat_use_hash_table <fact>
3158 
3159    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3160    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3161    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3162 
3163    Notes:
3164    If the *_nnz parameter is given then the *_nz parameter is ignored
3165 
3166    A nonzero block is any block that as 1 or more nonzeros in it
3167 
3168    The user MUST specify either the local or global matrix dimensions
3169    (possibly both).
3170 
3171    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3172    than it must be used on all processors that share the object for that argument.
3173 
3174    Storage Information:
3175    For a square global matrix we define each processor's diagonal portion
3176    to be its local rows and the corresponding columns (a square submatrix);
3177    each processor's off-diagonal portion encompasses the remainder of the
3178    local matrix (a rectangular submatrix).
3179 
3180    The user can specify preallocated storage for the diagonal part of
3181    the local submatrix with either d_nz or d_nnz (not both).  Set
3182    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3183    memory allocation.  Likewise, specify preallocated storage for the
3184    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3185 
3186    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3187    the figure below we depict these three local rows and all columns (0-11).
3188 
3189 .vb
3190            0 1 2 3 4 5 6 7 8 9 10 11
3191           --------------------------
3192    row 3  |o o o d d d o o o o  o  o
3193    row 4  |o o o d d d o o o o  o  o
3194    row 5  |o o o d d d o o o o  o  o
3195           --------------------------
3196 .ve
3197 
3198    Thus, any entries in the d locations are stored in the d (diagonal)
3199    submatrix, and any entries in the o locations are stored in the
3200    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3201    stored simply in the MATSEQBAIJ format for compressed row storage.
3202 
3203    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3204    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3205    In general, for PDE problems in which most nonzeros are near the diagonal,
3206    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3207    or you will get TERRIBLE performance; see the users' manual chapter on
3208    matrices.
3209 
3210    Level: intermediate
3211 
3212 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3213 @*/
3214 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3215 {
3216   PetscErrorCode ierr;
3217   PetscMPIInt    size;
3218 
3219   PetscFunctionBegin;
3220   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3221   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3222   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3223   if (size > 1) {
3224     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3225     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3226   } else {
3227     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3228     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3229   }
3230   PetscFunctionReturn(0);
3231 }
3232 
3233 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3234 {
3235   Mat            mat;
3236   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3237   PetscErrorCode ierr;
3238   PetscInt       len=0;
3239 
3240   PetscFunctionBegin;
3241   *newmat = NULL;
3242   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3243   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3244   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3245 
3246   mat->factortype   = matin->factortype;
3247   mat->preallocated = PETSC_TRUE;
3248   mat->assembled    = PETSC_TRUE;
3249   mat->insertmode   = NOT_SET_VALUES;
3250 
3251   a             = (Mat_MPIBAIJ*)mat->data;
3252   mat->rmap->bs = matin->rmap->bs;
3253   a->bs2        = oldmat->bs2;
3254   a->mbs        = oldmat->mbs;
3255   a->nbs        = oldmat->nbs;
3256   a->Mbs        = oldmat->Mbs;
3257   a->Nbs        = oldmat->Nbs;
3258 
3259   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3260   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3261 
3262   a->size         = oldmat->size;
3263   a->rank         = oldmat->rank;
3264   a->donotstash   = oldmat->donotstash;
3265   a->roworiented  = oldmat->roworiented;
3266   a->rowindices   = NULL;
3267   a->rowvalues    = NULL;
3268   a->getrowactive = PETSC_FALSE;
3269   a->barray       = NULL;
3270   a->rstartbs     = oldmat->rstartbs;
3271   a->rendbs       = oldmat->rendbs;
3272   a->cstartbs     = oldmat->cstartbs;
3273   a->cendbs       = oldmat->cendbs;
3274 
3275   /* hash table stuff */
3276   a->ht           = NULL;
3277   a->hd           = NULL;
3278   a->ht_size      = 0;
3279   a->ht_flag      = oldmat->ht_flag;
3280   a->ht_fact      = oldmat->ht_fact;
3281   a->ht_total_ct  = 0;
3282   a->ht_insert_ct = 0;
3283 
3284   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
3285   if (oldmat->colmap) {
3286 #if defined(PETSC_USE_CTABLE)
3287     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3288 #else
3289     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3290     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3291     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
3292 #endif
3293   } else a->colmap = NULL;
3294 
3295   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3296     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3297     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3298     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3299   } else a->garray = NULL;
3300 
3301   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3302   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3303   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3304   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3305   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3306 
3307   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3308   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3309   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3310   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3311   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3312   *newmat = mat;
3313   PetscFunctionReturn(0);
3314 }
3315 
3316 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3317 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3318 {
3319   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3320   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3321   PetscScalar    *matvals;
3322   PetscErrorCode ierr;
3323 
3324   PetscFunctionBegin;
3325   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3326 
3327   /* read in matrix header */
3328   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3329   PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3330   M  = header[1]; N = header[2]; nz = header[3];
3331   PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3332   PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3333   PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3334 
3335   /* set block sizes from the viewer's .info file */
3336   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3337   /* set local sizes if not set already */
3338   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3339   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3340   /* set global sizes if not set already */
3341   if (mat->rmap->N < 0) mat->rmap->N = M;
3342   if (mat->cmap->N < 0) mat->cmap->N = N;
3343   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3344   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3345 
3346   /* check if the matrix sizes are correct */
3347   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3348   PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3349   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3350   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3351   ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);CHKERRQ(ierr);
3352   ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);CHKERRQ(ierr);
3353   mbs = m/bs; nbs = n/bs;
3354 
3355   /* read in row lengths and build row indices */
3356   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3357   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3358   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3359   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3360   PetscCheckFalse(sum != nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3361 
3362   /* read in column indices and matrix values */
3363   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3364   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3365   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3366 
3367   { /* preallocate matrix storage */
3368     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3369     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3370     PetscBool  sbaij,done;
3371     PetscInt   *d_nnz,*o_nnz;
3372 
3373     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3374     ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
3375     ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr);
3376     ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr);
3377     for (i=0; i<mbs; i++) {
3378       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3379       ierr = PetscHSetIClear(ht);CHKERRQ(ierr);
3380       for (k=0; k<bs; k++) {
3381         PetscInt row = bs*i + k;
3382         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3383           PetscInt col = colidxs[j];
3384           if (!sbaij || col >= row) {
3385             if (col >= cs && col < ce) {
3386               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3387             } else {
3388               ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr);
3389               if (done) o_nnz[i]++;
3390             }
3391           }
3392         }
3393       }
3394     }
3395     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3396     ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
3397     ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3398     ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3399     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3400   }
3401 
3402   /* store matrix values */
3403   for (i=0; i<m; i++) {
3404     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3405     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3406   }
3407 
3408   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3409   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3410   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3411   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3412   PetscFunctionReturn(0);
3413 }
3414 
3415 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3416 {
3417   PetscErrorCode ierr;
3418   PetscBool      isbinary;
3419 
3420   PetscFunctionBegin;
3421   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3422   PetscCheckFalse(!isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3423   ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3424   PetscFunctionReturn(0);
3425 }
3426 
3427 /*@
3428    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3429 
3430    Input Parameters:
3431 +  mat  - the matrix
3432 -  fact - factor
3433 
3434    Not Collective, each process can use a different factor
3435 
3436    Level: advanced
3437 
3438   Notes:
3439    This can also be set by the command line option: -mat_use_hash_table <fact>
3440 
3441 .seealso: MatSetOption()
3442 @*/
3443 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3444 {
3445   PetscErrorCode ierr;
3446 
3447   PetscFunctionBegin;
3448   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3449   PetscFunctionReturn(0);
3450 }
3451 
3452 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3453 {
3454   Mat_MPIBAIJ *baij;
3455 
3456   PetscFunctionBegin;
3457   baij          = (Mat_MPIBAIJ*)mat->data;
3458   baij->ht_fact = fact;
3459   PetscFunctionReturn(0);
3460 }
3461 
3462 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3463 {
3464   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3465   PetscBool      flg;
3466   PetscErrorCode ierr;
3467 
3468   PetscFunctionBegin;
3469   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3470   PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3471   if (Ad)     *Ad     = a->A;
3472   if (Ao)     *Ao     = a->B;
3473   if (colmap) *colmap = a->garray;
3474   PetscFunctionReturn(0);
3475 }
3476 
3477 /*
3478     Special version for direct calls from Fortran (to eliminate two function call overheads
3479 */
3480 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3481 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3482 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3483 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3484 #endif
3485 
3486 /*@C
3487   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3488 
3489   Collective on Mat
3490 
3491   Input Parameters:
3492 + mat - the matrix
3493 . min - number of input rows
3494 . im - input rows
3495 . nin - number of input columns
3496 . in - input columns
3497 . v - numerical values input
3498 - addvin - INSERT_VALUES or ADD_VALUES
3499 
3500   Notes:
3501     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3502 
3503   Level: advanced
3504 
3505 .seealso:   MatSetValuesBlocked()
3506 @*/
3507 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3508 {
3509   /* convert input arguments to C version */
3510   Mat        mat  = *matin;
3511   PetscInt   m    = *min, n = *nin;
3512   InsertMode addv = *addvin;
3513 
3514   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3515   const MatScalar *value;
3516   MatScalar       *barray     = baij->barray;
3517   PetscBool       roworiented = baij->roworiented;
3518   PetscErrorCode  ierr;
3519   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3520   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3521   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3522 
3523   PetscFunctionBegin;
3524   /* tasks normally handled by MatSetValuesBlocked() */
3525   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3526   else PetscCheckFalse(mat->insertmode != addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3527   PetscCheckFalse(mat->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3528   if (mat->assembled) {
3529     mat->was_assembled = PETSC_TRUE;
3530     mat->assembled     = PETSC_FALSE;
3531   }
3532   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3533 
3534   if (!barray) {
3535     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3536     baij->barray = barray;
3537   }
3538 
3539   if (roworiented) stepval = (n-1)*bs;
3540   else stepval = (m-1)*bs;
3541 
3542   for (i=0; i<m; i++) {
3543     if (im[i] < 0) continue;
3544     PetscAssertFalse(im[i] >= baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
3545     if (im[i] >= rstart && im[i] < rend) {
3546       row = im[i] - rstart;
3547       for (j=0; j<n; j++) {
3548         /* If NumCol = 1 then a copy is not required */
3549         if ((roworiented) && (n == 1)) {
3550           barray = (MatScalar*)v + i*bs2;
3551         } else if ((!roworiented) && (m == 1)) {
3552           barray = (MatScalar*)v + j*bs2;
3553         } else { /* Here a copy is required */
3554           if (roworiented) {
3555             value = v + i*(stepval+bs)*bs + j*bs;
3556           } else {
3557             value = v + j*(stepval+bs)*bs + i*bs;
3558           }
3559           for (ii=0; ii<bs; ii++,value+=stepval) {
3560             for (jj=0; jj<bs; jj++) {
3561               *barray++ = *value++;
3562             }
3563           }
3564           barray -=bs2;
3565         }
3566 
3567         if (in[j] >= cstart && in[j] < cend) {
3568           col  = in[j] - cstart;
3569           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3570         } else if (in[j] < 0) continue;
3571         else PetscAssertFalse(in[j] >= baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
3572         else {
3573           if (mat->was_assembled) {
3574             if (!baij->colmap) {
3575               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3576             }
3577 
3578 #if defined(PETSC_USE_DEBUG)
3579 #if defined(PETSC_USE_CTABLE)
3580             { PetscInt data;
3581               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3582               PetscCheckFalse((data - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3583             }
3584 #else
3585             PetscCheckFalse((baij->colmap[in[j]] - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3586 #endif
3587 #endif
3588 #if defined(PETSC_USE_CTABLE)
3589             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3590             col  = (col - 1)/bs;
3591 #else
3592             col = (baij->colmap[in[j]] - 1)/bs;
3593 #endif
3594             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3595               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3596               col  =  in[j];
3597             }
3598           } else col = in[j];
3599           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3600         }
3601       }
3602     } else {
3603       if (!baij->donotstash) {
3604         if (roworiented) {
3605           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3606         } else {
3607           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3608         }
3609       }
3610     }
3611   }
3612 
3613   /* task normally handled by MatSetValuesBlocked() */
3614   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3615   PetscFunctionReturn(0);
3616 }
3617 
3618 /*@
3619      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3620          CSR format the local rows.
3621 
3622    Collective
3623 
3624    Input Parameters:
3625 +  comm - MPI communicator
3626 .  bs - the block size, only a block size of 1 is supported
3627 .  m - number of local rows (Cannot be PETSC_DECIDE)
3628 .  n - This value should be the same as the local size used in creating the
3629        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3630        calculated if N is given) For square matrices n is almost always m.
3631 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3632 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3633 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3634 .   j - column indices
3635 -   a - matrix values
3636 
3637    Output Parameter:
3638 .   mat - the matrix
3639 
3640    Level: intermediate
3641 
3642    Notes:
3643        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3644      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3645      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3646 
3647      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3648      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3649      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3650      with column-major ordering within blocks.
3651 
3652        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3653 
3654 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3655           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3656 @*/
3657 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3658 {
3659   PetscErrorCode ierr;
3660 
3661   PetscFunctionBegin;
3662   PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3663   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3664   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3665   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3666   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3667   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3668   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3669   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3670   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3671   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3672   PetscFunctionReturn(0);
3673 }
3674 
3675 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3676 {
3677   PetscErrorCode ierr;
3678   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3679   PetscInt       *indx;
3680   PetscScalar    *values;
3681 
3682   PetscFunctionBegin;
3683   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3684   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3685     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3686     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3687     PetscInt       *bindx,rmax=a->rmax,j;
3688     PetscMPIInt    rank,size;
3689 
3690     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3691     mbs = m/bs; Nbs = N/cbs;
3692     if (n == PETSC_DECIDE) {
3693       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);CHKERRQ(ierr);
3694     }
3695     nbs = n/cbs;
3696 
3697     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3698     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3699 
3700     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3701     ierr = MPI_Comm_rank(comm,&size);CHKERRMPI(ierr);
3702     if (rank == size-1) {
3703       /* Check sum(nbs) = Nbs */
3704       PetscCheckFalse(__end != Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs);
3705     }
3706 
3707     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3708     for (i=0; i<mbs; i++) {
3709       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3710       nnz = nnz/bs;
3711       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3712       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3713       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3714     }
3715     ierr = PetscFree(bindx);CHKERRQ(ierr);
3716 
3717     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3718     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3719     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3720     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3721     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3722     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3723     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3724     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3725   }
3726 
3727   /* numeric phase */
3728   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3729   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3730 
3731   for (i=0; i<m; i++) {
3732     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3733     Ii   = i + rstart;
3734     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3735     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3736   }
3737   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3738   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3739   PetscFunctionReturn(0);
3740 }
3741