xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision bfcb38ea38335faa6e7f8d97f6bc6ce9aa2a1dd1)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode ierr;
15   PetscInt       i,*idxb = 0;
16   PetscScalar    *va,*vb;
17   Vec            vtmp;
18 
19   PetscFunctionBegin;
20   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
21   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
22   if (idx) {
23     for (i=0; i<A->rmap->n; i++) {
24       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
25     }
26   }
27 
28   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
29   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
30   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
31   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
32 
33   for (i=0; i<A->rmap->n; i++) {
34     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
35       va[i] = vb[i];
36       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
37     }
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
59 {
60   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
65   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 /*
70      Local utility routine that creates a mapping from the global column
71    number to the local number in the off-diagonal part of the local
72    storage of the matrix.  This is done in a non scalable way since the
73    length of colmap equals the global matrix length.
74 */
75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
76 {
77   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
78   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
79   PetscErrorCode ierr;
80   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
81 
82   PetscFunctionBegin;
83 #if defined(PETSC_USE_CTABLE)
84   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
85   for (i=0; i<nbs; i++) {
86     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
87   }
88 #else
89   ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
90   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
91   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
92   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
93 #endif
94   PetscFunctionReturn(0);
95 }
96 
97 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
98   { \
99  \
100     brow = row/bs;  \
101     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
102     rmax = aimax[brow]; nrow = ailen[brow]; \
103     bcol = col/bs; \
104     ridx = row % bs; cidx = col % bs; \
105     low  = 0; high = nrow; \
106     while (high-low > 3) { \
107       t = (low+high)/2; \
108       if (rp[t] > bcol) high = t; \
109       else              low  = t; \
110     } \
111     for (_i=low; _i<high; _i++) { \
112       if (rp[_i] > bcol) break; \
113       if (rp[_i] == bcol) { \
114         bap = ap +  bs2*_i + bs*cidx + ridx; \
115         if (addv == ADD_VALUES) *bap += value;  \
116         else                    *bap  = value;  \
117         goto a_noinsert; \
118       } \
119     } \
120     if (a->nonew == 1) goto a_noinsert; \
121     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
122     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
123     N = nrow++ - 1;  \
124     /* shift up all the later entries in this row */ \
125     for (ii=N; ii>=_i; ii--) { \
126       rp[ii+1] = rp[ii]; \
127       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
128     } \
129     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
130     rp[_i]                      = bcol;  \
131     ap[bs2*_i + bs*cidx + ridx] = value;  \
132 a_noinsert:; \
133     ailen[brow] = nrow; \
134   }
135 
136 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
137   { \
138     brow = row/bs;  \
139     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
140     rmax = bimax[brow]; nrow = bilen[brow]; \
141     bcol = col/bs; \
142     ridx = row % bs; cidx = col % bs; \
143     low  = 0; high = nrow; \
144     while (high-low > 3) { \
145       t = (low+high)/2; \
146       if (rp[t] > bcol) high = t; \
147       else              low  = t; \
148     } \
149     for (_i=low; _i<high; _i++) { \
150       if (rp[_i] > bcol) break; \
151       if (rp[_i] == bcol) { \
152         bap = ap +  bs2*_i + bs*cidx + ridx; \
153         if (addv == ADD_VALUES) *bap += value;  \
154         else                    *bap  = value;  \
155         goto b_noinsert; \
156       } \
157     } \
158     if (b->nonew == 1) goto b_noinsert; \
159     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
160     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
161     N = nrow++ - 1;  \
162     /* shift up all the later entries in this row */ \
163     for (ii=N; ii>=_i; ii--) { \
164       rp[ii+1] = rp[ii]; \
165       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
166     } \
167     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
168     rp[_i]                      = bcol;  \
169     ap[bs2*_i + bs*cidx + ridx] = value;  \
170 b_noinsert:; \
171     bilen[brow] = nrow; \
172   }
173 
174 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
175 {
176   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
177   MatScalar      value;
178   PetscBool      roworiented = baij->roworiented;
179   PetscErrorCode ierr;
180   PetscInt       i,j,row,col;
181   PetscInt       rstart_orig=mat->rmap->rstart;
182   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
183   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
184 
185   /* Some Variables required in the macro */
186   Mat         A     = baij->A;
187   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
188   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
189   MatScalar   *aa   =a->a;
190 
191   Mat         B     = baij->B;
192   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
193   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
194   MatScalar   *ba   =b->a;
195 
196   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
197   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
198   MatScalar *ap,*bap;
199 
200   PetscFunctionBegin;
201   for (i=0; i<m; i++) {
202     if (im[i] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
205 #endif
206     if (im[i] >= rstart_orig && im[i] < rend_orig) {
207       row = im[i] - rstart_orig;
208       for (j=0; j<n; j++) {
209         if (in[j] >= cstart_orig && in[j] < cend_orig) {
210           col = in[j] - cstart_orig;
211           if (roworiented) value = v[i*n+j];
212           else             value = v[i+j*m];
213           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
214           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
215         } else if (in[j] < 0) continue;
216 #if defined(PETSC_USE_DEBUG)
217         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
218 #endif
219         else {
220           if (mat->was_assembled) {
221             if (!baij->colmap) {
222               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
223             }
224 #if defined(PETSC_USE_CTABLE)
225             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
226             col  = col - 1;
227 #else
228             col = baij->colmap[in[j]/bs] - 1;
229 #endif
230             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
231               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
232               col  =  in[j];
233               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
234               B    = baij->B;
235               b    = (Mat_SeqBAIJ*)(B)->data;
236               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
237               ba   =b->a;
238             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
239             else col += in[j]%bs;
240           } else col = in[j];
241           if (roworiented) value = v[i*n+j];
242           else             value = v[i+j*m];
243           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
244           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
245         }
246       }
247     } else {
248       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
249       if (!baij->donotstash) {
250         mat->assembled = PETSC_FALSE;
251         if (roworiented) {
252           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
253         } else {
254           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
255         }
256       }
257     }
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
263 {
264   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
265   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
266   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
267   PetscErrorCode    ierr;
268   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
269   PetscBool         roworiented=a->roworiented;
270   const PetscScalar *value     = v;
271   MatScalar         *ap,*aa = a->a,*bap;
272 
273   PetscFunctionBegin;
274   rp   = aj + ai[row];
275   ap   = aa + bs2*ai[row];
276   rmax = imax[row];
277   nrow = ailen[row];
278   value = v;
279   low = 0;
280   high = nrow;
281   while (high-low > 7) {
282     t = (low+high)/2;
283     if (rp[t] > col) high = t;
284     else             low  = t;
285   }
286   for (i=low; i<high; i++) {
287     if (rp[i] > col) break;
288     if (rp[i] == col) {
289       bap = ap +  bs2*i;
290       if (roworiented) {
291         if (is == ADD_VALUES) {
292           for (ii=0; ii<bs; ii++) {
293             for (jj=ii; jj<bs2; jj+=bs) {
294               bap[jj] += *value++;
295             }
296           }
297         } else {
298           for (ii=0; ii<bs; ii++) {
299             for (jj=ii; jj<bs2; jj+=bs) {
300               bap[jj] = *value++;
301             }
302           }
303         }
304       } else {
305         if (is == ADD_VALUES) {
306           for (ii=0; ii<bs; ii++,value+=bs) {
307             for (jj=0; jj<bs; jj++) {
308               bap[jj] += value[jj];
309             }
310             bap += bs;
311           }
312         } else {
313           for (ii=0; ii<bs; ii++,value+=bs) {
314             for (jj=0; jj<bs; jj++) {
315               bap[jj]  = value[jj];
316             }
317             bap += bs;
318           }
319         }
320       }
321       goto noinsert2;
322     }
323   }
324   if (nonew == 1) goto noinsert2;
325   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
326   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
327   N = nrow++ - 1; high++;
328   /* shift up all the later entries in this row */
329   for (ii=N; ii>=i; ii--) {
330     rp[ii+1] = rp[ii];
331     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
332   }
333   if (N >= i) {
334     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
335   }
336   rp[i] = col;
337   bap   = ap +  bs2*i;
338   if (roworiented) {
339     for (ii=0; ii<bs; ii++) {
340       for (jj=ii; jj<bs2; jj+=bs) {
341         bap[jj] = *value++;
342       }
343     }
344   } else {
345     for (ii=0; ii<bs; ii++) {
346       for (jj=0; jj<bs; jj++) {
347         *bap++ = *value++;
348       }
349     }
350   }
351   noinsert2:;
352   ailen[row] = nrow;
353   PetscFunctionReturn(0);
354 }
355 
356 /*
357     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
358     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
359 */
360 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
361 {
362   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
363   const PetscScalar *value;
364   MatScalar         *barray     = baij->barray;
365   PetscBool         roworiented = baij->roworiented;
366   PetscErrorCode    ierr;
367   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
368   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
369   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
370 
371   PetscFunctionBegin;
372   if (!barray) {
373     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
374     baij->barray = barray;
375   }
376 
377   if (roworiented) stepval = (n-1)*bs;
378   else stepval = (m-1)*bs;
379 
380   for (i=0; i<m; i++) {
381     if (im[i] < 0) continue;
382 #if defined(PETSC_USE_DEBUG)
383     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
384 #endif
385     if (im[i] >= rstart && im[i] < rend) {
386       row = im[i] - rstart;
387       for (j=0; j<n; j++) {
388         /* If NumCol = 1 then a copy is not required */
389         if ((roworiented) && (n == 1)) {
390           barray = (MatScalar*)v + i*bs2;
391         } else if ((!roworiented) && (m == 1)) {
392           barray = (MatScalar*)v + j*bs2;
393         } else { /* Here a copy is required */
394           if (roworiented) {
395             value = v + (i*(stepval+bs) + j)*bs;
396           } else {
397             value = v + (j*(stepval+bs) + i)*bs;
398           }
399           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
400             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
401             barray += bs;
402           }
403           barray -= bs2;
404         }
405 
406         if (in[j] >= cstart && in[j] < cend) {
407           col  = in[j] - cstart;
408           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
409         } else if (in[j] < 0) continue;
410 #if defined(PETSC_USE_DEBUG)
411         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
412 #endif
413         else {
414           if (mat->was_assembled) {
415             if (!baij->colmap) {
416               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
417             }
418 
419 #if defined(PETSC_USE_DEBUG)
420 #if defined(PETSC_USE_CTABLE)
421             { PetscInt data;
422               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
423               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
424             }
425 #else
426             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
427 #endif
428 #endif
429 #if defined(PETSC_USE_CTABLE)
430             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
431             col  = (col - 1)/bs;
432 #else
433             col = (baij->colmap[in[j]] - 1)/bs;
434 #endif
435             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
436               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
437               col  =  in[j];
438             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
439           } else col = in[j];
440           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
441         }
442       }
443     } else {
444       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454   PetscFunctionReturn(0);
455 }
456 
457 #define HASH_KEY 0.6180339887
458 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
459 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
460 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
461 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
462 {
463   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
464   PetscBool      roworiented = baij->roworiented;
465   PetscErrorCode ierr;
466   PetscInt       i,j,row,col;
467   PetscInt       rstart_orig=mat->rmap->rstart;
468   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
469   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
470   PetscReal      tmp;
471   MatScalar      **HD = baij->hd,value;
472 #if defined(PETSC_USE_DEBUG)
473   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
474 #endif
475 
476   PetscFunctionBegin;
477   for (i=0; i<m; i++) {
478 #if defined(PETSC_USE_DEBUG)
479     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
480     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
481 #endif
482     row = im[i];
483     if (row >= rstart_orig && row < rend_orig) {
484       for (j=0; j<n; j++) {
485         col = in[j];
486         if (roworiented) value = v[i*n+j];
487         else             value = v[i+j*m];
488         /* Look up PetscInto the Hash Table */
489         key = (row/bs)*Nbs+(col/bs)+1;
490         h1  = HASH(size,key,tmp);
491 
492 
493         idx = h1;
494 #if defined(PETSC_USE_DEBUG)
495         insert_ct++;
496         total_ct++;
497         if (HT[idx] != key) {
498           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
499           if (idx == size) {
500             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
501             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
502           }
503         }
504 #else
505         if (HT[idx] != key) {
506           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
507           if (idx == size) {
508             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
509             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510           }
511         }
512 #endif
513         /* A HASH table entry is found, so insert the values at the correct address */
514         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
515         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
516       }
517     } else if (!baij->donotstash) {
518       if (roworiented) {
519         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
520       } else {
521         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
522       }
523     }
524   }
525 #if defined(PETSC_USE_DEBUG)
526   baij->ht_total_ct  += total_ct;
527   baij->ht_insert_ct += insert_ct;
528 #endif
529   PetscFunctionReturn(0);
530 }
531 
532 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
533 {
534   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
535   PetscBool         roworiented = baij->roworiented;
536   PetscErrorCode    ierr;
537   PetscInt          i,j,ii,jj,row,col;
538   PetscInt          rstart=baij->rstartbs;
539   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
540   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
541   PetscReal         tmp;
542   MatScalar         **HD = baij->hd,*baij_a;
543   const PetscScalar *v_t,*value;
544 #if defined(PETSC_USE_DEBUG)
545   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
546 #endif
547 
548   PetscFunctionBegin;
549   if (roworiented) stepval = (n-1)*bs;
550   else stepval = (m-1)*bs;
551 
552   for (i=0; i<m; i++) {
553 #if defined(PETSC_USE_DEBUG)
554     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
555     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
556 #endif
557     row = im[i];
558     v_t = v + i*nbs2;
559     if (row >= rstart && row < rend) {
560       for (j=0; j<n; j++) {
561         col = in[j];
562 
563         /* Look up into the Hash Table */
564         key = row*Nbs+col+1;
565         h1  = HASH(size,key,tmp);
566 
567         idx = h1;
568 #if defined(PETSC_USE_DEBUG)
569         total_ct++;
570         insert_ct++;
571         if (HT[idx] != key) {
572           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
573           if (idx == size) {
574             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
575             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
576           }
577         }
578 #else
579         if (HT[idx] != key) {
580           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
581           if (idx == size) {
582             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
583             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
584           }
585         }
586 #endif
587         baij_a = HD[idx];
588         if (roworiented) {
589           /*value = v + i*(stepval+bs)*bs + j*bs;*/
590           /* value = v + (i*(stepval+bs)+j)*bs; */
591           value = v_t;
592           v_t  += bs;
593           if (addv == ADD_VALUES) {
594             for (ii=0; ii<bs; ii++,value+=stepval) {
595               for (jj=ii; jj<bs2; jj+=bs) {
596                 baij_a[jj] += *value++;
597               }
598             }
599           } else {
600             for (ii=0; ii<bs; ii++,value+=stepval) {
601               for (jj=ii; jj<bs2; jj+=bs) {
602                 baij_a[jj] = *value++;
603               }
604             }
605           }
606         } else {
607           value = v + j*(stepval+bs)*bs + i*bs;
608           if (addv == ADD_VALUES) {
609             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
610               for (jj=0; jj<bs; jj++) {
611                 baij_a[jj] += *value++;
612               }
613             }
614           } else {
615             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
616               for (jj=0; jj<bs; jj++) {
617                 baij_a[jj] = *value++;
618               }
619             }
620           }
621         }
622       }
623     } else {
624       if (!baij->donotstash) {
625         if (roworiented) {
626           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
627         } else {
628           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
629         }
630       }
631     }
632   }
633 #if defined(PETSC_USE_DEBUG)
634   baij->ht_total_ct  += total_ct;
635   baij->ht_insert_ct += insert_ct;
636 #endif
637   PetscFunctionReturn(0);
638 }
639 
640 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
641 {
642   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
643   PetscErrorCode ierr;
644   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
645   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
646 
647   PetscFunctionBegin;
648   for (i=0; i<m; i++) {
649     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
650     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
651     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
652       row = idxm[i] - bsrstart;
653       for (j=0; j<n; j++) {
654         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
655         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
656         if (idxn[j] >= bscstart && idxn[j] < bscend) {
657           col  = idxn[j] - bscstart;
658           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
659         } else {
660           if (!baij->colmap) {
661             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
662           }
663 #if defined(PETSC_USE_CTABLE)
664           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
665           data--;
666 #else
667           data = baij->colmap[idxn[j]/bs]-1;
668 #endif
669           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
670           else {
671             col  = data + idxn[j]%bs;
672             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
673           }
674         }
675       }
676     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
677   }
678   PetscFunctionReturn(0);
679 }
680 
681 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
682 {
683   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
684   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
685   PetscErrorCode ierr;
686   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
687   PetscReal      sum = 0.0;
688   MatScalar      *v;
689 
690   PetscFunctionBegin;
691   if (baij->size == 1) {
692     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
693   } else {
694     if (type == NORM_FROBENIUS) {
695       v  = amat->a;
696       nz = amat->nz*bs2;
697       for (i=0; i<nz; i++) {
698         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
699       }
700       v  = bmat->a;
701       nz = bmat->nz*bs2;
702       for (i=0; i<nz; i++) {
703         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
704       }
705       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
706       *nrm = PetscSqrtReal(*nrm);
707     } else if (type == NORM_1) { /* max column sum */
708       PetscReal *tmp,*tmp2;
709       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
710       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
711       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
712       v    = amat->a; jj = amat->j;
713       for (i=0; i<amat->nz; i++) {
714         for (j=0; j<bs; j++) {
715           col = bs*(cstart + *jj) + j; /* column index */
716           for (row=0; row<bs; row++) {
717             tmp[col] += PetscAbsScalar(*v);  v++;
718           }
719         }
720         jj++;
721       }
722       v = bmat->a; jj = bmat->j;
723       for (i=0; i<bmat->nz; i++) {
724         for (j=0; j<bs; j++) {
725           col = bs*garray[*jj] + j;
726           for (row=0; row<bs; row++) {
727             tmp[col] += PetscAbsScalar(*v); v++;
728           }
729         }
730         jj++;
731       }
732       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
733       *nrm = 0.0;
734       for (j=0; j<mat->cmap->N; j++) {
735         if (tmp2[j] > *nrm) *nrm = tmp2[j];
736       }
737       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
738     } else if (type == NORM_INFINITY) { /* max row sum */
739       PetscReal *sums;
740       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
741       sum  = 0.0;
742       for (j=0; j<amat->mbs; j++) {
743         for (row=0; row<bs; row++) sums[row] = 0.0;
744         v  = amat->a + bs2*amat->i[j];
745         nz = amat->i[j+1]-amat->i[j];
746         for (i=0; i<nz; i++) {
747           for (col=0; col<bs; col++) {
748             for (row=0; row<bs; row++) {
749               sums[row] += PetscAbsScalar(*v); v++;
750             }
751           }
752         }
753         v  = bmat->a + bs2*bmat->i[j];
754         nz = bmat->i[j+1]-bmat->i[j];
755         for (i=0; i<nz; i++) {
756           for (col=0; col<bs; col++) {
757             for (row=0; row<bs; row++) {
758               sums[row] += PetscAbsScalar(*v); v++;
759             }
760           }
761         }
762         for (row=0; row<bs; row++) {
763           if (sums[row] > sum) sum = sums[row];
764         }
765       }
766       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
767       ierr = PetscFree(sums);CHKERRQ(ierr);
768     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
769   }
770   PetscFunctionReturn(0);
771 }
772 
773 /*
774   Creates the hash table, and sets the table
775   This table is created only once.
776   If new entried need to be added to the matrix
777   then the hash table has to be destroyed and
778   recreated.
779 */
780 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
781 {
782   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
783   Mat            A     = baij->A,B=baij->B;
784   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
785   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
786   PetscErrorCode ierr;
787   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
788   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
789   PetscInt       *HT,key;
790   MatScalar      **HD;
791   PetscReal      tmp;
792 #if defined(PETSC_USE_INFO)
793   PetscInt ct=0,max=0;
794 #endif
795 
796   PetscFunctionBegin;
797   if (baij->ht) PetscFunctionReturn(0);
798 
799   baij->ht_size = (PetscInt)(factor*nz);
800   ht_size       = baij->ht_size;
801 
802   /* Allocate Memory for Hash Table */
803   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
804   HD   = baij->hd;
805   HT   = baij->ht;
806 
807   /* Loop Over A */
808   for (i=0; i<a->mbs; i++) {
809     for (j=ai[i]; j<ai[i+1]; j++) {
810       row = i+rstart;
811       col = aj[j]+cstart;
812 
813       key = row*Nbs + col + 1;
814       h1  = HASH(ht_size,key,tmp);
815       for (k=0; k<ht_size; k++) {
816         if (!HT[(h1+k)%ht_size]) {
817           HT[(h1+k)%ht_size] = key;
818           HD[(h1+k)%ht_size] = a->a + j*bs2;
819           break;
820 #if defined(PETSC_USE_INFO)
821         } else {
822           ct++;
823 #endif
824         }
825       }
826 #if defined(PETSC_USE_INFO)
827       if (k> max) max = k;
828 #endif
829     }
830   }
831   /* Loop Over B */
832   for (i=0; i<b->mbs; i++) {
833     for (j=bi[i]; j<bi[i+1]; j++) {
834       row = i+rstart;
835       col = garray[bj[j]];
836       key = row*Nbs + col + 1;
837       h1  = HASH(ht_size,key,tmp);
838       for (k=0; k<ht_size; k++) {
839         if (!HT[(h1+k)%ht_size]) {
840           HT[(h1+k)%ht_size] = key;
841           HD[(h1+k)%ht_size] = b->a + j*bs2;
842           break;
843 #if defined(PETSC_USE_INFO)
844         } else {
845           ct++;
846 #endif
847         }
848       }
849 #if defined(PETSC_USE_INFO)
850       if (k> max) max = k;
851 #endif
852     }
853   }
854 
855   /* Print Summary */
856 #if defined(PETSC_USE_INFO)
857   for (i=0,j=0; i<ht_size; i++) {
858     if (HT[i]) j++;
859   }
860   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
861 #endif
862   PetscFunctionReturn(0);
863 }
864 
865 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
866 {
867   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
868   PetscErrorCode ierr;
869   PetscInt       nstash,reallocs;
870 
871   PetscFunctionBegin;
872   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
873 
874   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
875   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
876   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
877   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
878   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
879   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882 
883 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
884 {
885   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
886   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
887   PetscErrorCode ierr;
888   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
889   PetscInt       *row,*col;
890   PetscBool      r1,r2,r3,other_disassembled;
891   MatScalar      *val;
892   PetscMPIInt    n;
893 
894   PetscFunctionBegin;
895   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
896   if (!baij->donotstash && !mat->nooffprocentries) {
897     while (1) {
898       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
899       if (!flg) break;
900 
901       for (i=0; i<n;) {
902         /* Now identify the consecutive vals belonging to the same row */
903         for (j=i,rstart=row[j]; j<n; j++) {
904           if (row[j] != rstart) break;
905         }
906         if (j < n) ncols = j-i;
907         else       ncols = n-i;
908         /* Now assemble all these values with a single function call */
909         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
910         i    = j;
911       }
912     }
913     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
914     /* Now process the block-stash. Since the values are stashed column-oriented,
915        set the roworiented flag to column oriented, and after MatSetValues()
916        restore the original flags */
917     r1 = baij->roworiented;
918     r2 = a->roworiented;
919     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
920 
921     baij->roworiented = PETSC_FALSE;
922     a->roworiented    = PETSC_FALSE;
923 
924     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
925     while (1) {
926       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
927       if (!flg) break;
928 
929       for (i=0; i<n;) {
930         /* Now identify the consecutive vals belonging to the same row */
931         for (j=i,rstart=row[j]; j<n; j++) {
932           if (row[j] != rstart) break;
933         }
934         if (j < n) ncols = j-i;
935         else       ncols = n-i;
936         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
937         i    = j;
938       }
939     }
940     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
941 
942     baij->roworiented = r1;
943     a->roworiented    = r2;
944 
945     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
946   }
947 
948   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
949   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
950 
951   /* determine if any processor has disassembled, if so we must
952      also disassemble ourselfs, in order that we may reassemble. */
953   /*
954      if nonzero structure of submatrix B cannot change then we know that
955      no processor disassembled thus we can skip this stuff
956   */
957   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
958     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
959     if (mat->was_assembled && !other_disassembled) {
960       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
961     }
962   }
963 
964   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
965     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
966   }
967   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
968   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
969 
970 #if defined(PETSC_USE_INFO)
971   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
972     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
973 
974     baij->ht_total_ct  = 0;
975     baij->ht_insert_ct = 0;
976   }
977 #endif
978   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
979     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
980 
981     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
982     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
983   }
984 
985   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
986 
987   baij->rowvalues = 0;
988 
989   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
990   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
991     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
992     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
993   }
994   PetscFunctionReturn(0);
995 }
996 
997 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
998 #include <petscdraw.h>
999 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1000 {
1001   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1002   PetscErrorCode    ierr;
1003   PetscMPIInt       rank = baij->rank;
1004   PetscInt          bs   = mat->rmap->bs;
1005   PetscBool         iascii,isdraw;
1006   PetscViewer       sviewer;
1007   PetscViewerFormat format;
1008 
1009   PetscFunctionBegin;
1010   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1011   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1012   if (iascii) {
1013     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1014     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1015       MatInfo info;
1016       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1017       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1018       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1019       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1020                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1021       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1022       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1023       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1024       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1025       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1026       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1027       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1028       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1029       PetscFunctionReturn(0);
1030     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1031       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1032       PetscFunctionReturn(0);
1033     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1034       PetscFunctionReturn(0);
1035     }
1036   }
1037 
1038   if (isdraw) {
1039     PetscDraw draw;
1040     PetscBool isnull;
1041     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1042     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1043     if (isnull) PetscFunctionReturn(0);
1044   }
1045 
1046   {
1047     /* assemble the entire matrix onto first processor. */
1048     Mat         A;
1049     Mat_SeqBAIJ *Aloc;
1050     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1051     MatScalar   *a;
1052     const char  *matname;
1053 
1054     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1055     /* Perhaps this should be the type of mat? */
1056     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1057     if (!rank) {
1058       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1059     } else {
1060       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1061     }
1062     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1063     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1064     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1065     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1066 
1067     /* copy over the A part */
1068     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1069     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1070     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1071 
1072     for (i=0; i<mbs; i++) {
1073       rvals[0] = bs*(baij->rstartbs + i);
1074       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1075       for (j=ai[i]; j<ai[i+1]; j++) {
1076         col = (baij->cstartbs+aj[j])*bs;
1077         for (k=0; k<bs; k++) {
1078           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1079           col++; a += bs;
1080         }
1081       }
1082     }
1083     /* copy over the B part */
1084     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1085     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1086     for (i=0; i<mbs; i++) {
1087       rvals[0] = bs*(baij->rstartbs + i);
1088       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1089       for (j=ai[i]; j<ai[i+1]; j++) {
1090         col = baij->garray[aj[j]]*bs;
1091         for (k=0; k<bs; k++) {
1092           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1093           col++; a += bs;
1094         }
1095       }
1096     }
1097     ierr = PetscFree(rvals);CHKERRQ(ierr);
1098     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1099     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1100     /*
1101        Everyone has to call to draw the matrix since the graphics waits are
1102        synchronized across all processors that share the PetscDraw object
1103     */
1104     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1105     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1106     if (!rank) {
1107       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1108       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1109     }
1110     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1111     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1112     ierr = MatDestroy(&A);CHKERRQ(ierr);
1113   }
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1118 {
1119   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1120   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1121   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1122   PetscErrorCode ierr;
1123   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1124   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1125   int            fd;
1126   PetscScalar    *column_values;
1127   FILE           *file;
1128   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1129   PetscInt       message_count,flowcontrolcount;
1130 
1131   PetscFunctionBegin;
1132   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1133   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1134   nz   = bs2*(A->nz + B->nz);
1135   rlen = mat->rmap->n;
1136   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1137   if (!rank) {
1138     header[0] = MAT_FILE_CLASSID;
1139     header[1] = mat->rmap->N;
1140     header[2] = mat->cmap->N;
1141 
1142     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1143     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1144     /* get largest number of rows any processor has */
1145     range = mat->rmap->range;
1146     for (i=1; i<size; i++) {
1147       rlen = PetscMax(rlen,range[i+1] - range[i]);
1148     }
1149   } else {
1150     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1151   }
1152 
1153   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1154   /* compute lengths of each row  */
1155   for (i=0; i<a->mbs; i++) {
1156     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1157   }
1158   /* store the row lengths to the file */
1159   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1160   if (!rank) {
1161     MPI_Status status;
1162     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1163     rlen = (range[1] - range[0])/bs;
1164     for (i=0; i<rlen; i++) {
1165       for (j=0; j<bs; j++) {
1166         row_lens[i*bs+j] = bs*crow_lens[i];
1167       }
1168     }
1169     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1170     for (i=1; i<size; i++) {
1171       rlen = (range[i+1] - range[i])/bs;
1172       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1173       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1174       for (k=0; k<rlen; k++) {
1175         for (j=0; j<bs; j++) {
1176           row_lens[k*bs+j] = bs*crow_lens[k];
1177         }
1178       }
1179       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1180     }
1181     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1182     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1183   } else {
1184     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1185     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1186     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1187   }
1188   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1189 
1190   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1191      information needed to make it for each row from a block row. This does require more communication but still not more than
1192      the communication needed for the nonzero values  */
1193   nzmax = nz; /*  space a largest processor needs */
1194   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1195   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1196   cnt   = 0;
1197   for (i=0; i<a->mbs; i++) {
1198     pcnt = cnt;
1199     for (j=B->i[i]; j<B->i[i+1]; j++) {
1200       if ((col = garray[B->j[j]]) > cstart) break;
1201       for (l=0; l<bs; l++) {
1202         column_indices[cnt++] = bs*col+l;
1203       }
1204     }
1205     for (k=A->i[i]; k<A->i[i+1]; k++) {
1206       for (l=0; l<bs; l++) {
1207         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1208       }
1209     }
1210     for (; j<B->i[i+1]; j++) {
1211       for (l=0; l<bs; l++) {
1212         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1213       }
1214     }
1215     len = cnt - pcnt;
1216     for (k=1; k<bs; k++) {
1217       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1218       cnt += len;
1219     }
1220   }
1221   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1222 
1223   /* store the columns to the file */
1224   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1225   if (!rank) {
1226     MPI_Status status;
1227     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1228     for (i=1; i<size; i++) {
1229       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1230       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1231       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1232       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1233     }
1234     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1235   } else {
1236     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1237     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1238     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1240   }
1241   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1242 
1243   /* load up the numerical values */
1244   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1245   cnt  = 0;
1246   for (i=0; i<a->mbs; i++) {
1247     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1248     for (j=B->i[i]; j<B->i[i+1]; j++) {
1249       if (garray[B->j[j]] > cstart) break;
1250       for (l=0; l<bs; l++) {
1251         for (ll=0; ll<bs; ll++) {
1252           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1253         }
1254       }
1255       cnt += bs;
1256     }
1257     for (k=A->i[i]; k<A->i[i+1]; k++) {
1258       for (l=0; l<bs; l++) {
1259         for (ll=0; ll<bs; ll++) {
1260           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1261         }
1262       }
1263       cnt += bs;
1264     }
1265     for (; j<B->i[i+1]; j++) {
1266       for (l=0; l<bs; l++) {
1267         for (ll=0; ll<bs; ll++) {
1268           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1269         }
1270       }
1271       cnt += bs;
1272     }
1273     cnt += (bs-1)*rlen;
1274   }
1275   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1276 
1277   /* store the column values to the file */
1278   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1279   if (!rank) {
1280     MPI_Status status;
1281     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1282     for (i=1; i<size; i++) {
1283       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1284       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1285       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1286       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1287     }
1288     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1289   } else {
1290     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1291     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1292     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1293     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1294   }
1295   ierr = PetscFree(column_values);CHKERRQ(ierr);
1296 
1297   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1298   if (file) {
1299     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1300   }
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1305 {
1306   PetscErrorCode ierr;
1307   PetscBool      iascii,isdraw,issocket,isbinary;
1308 
1309   PetscFunctionBegin;
1310   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1311   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1312   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1313   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1314   if (iascii || isdraw || issocket) {
1315     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1316   } else if (isbinary) {
1317     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1323 {
1324   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328 #if defined(PETSC_USE_LOG)
1329   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1330 #endif
1331   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1332   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1333   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1334   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1335 #if defined(PETSC_USE_CTABLE)
1336   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1337 #else
1338   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1339 #endif
1340   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1341   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1342   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1343   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1344   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1345   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1346   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1347   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1348 
1349   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1350   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1351   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1352   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1353   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1354   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1355   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1356   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1357   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1358 #if defined(PETSC_HAVE_HYPRE)
1359   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1360 #endif
1361   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1362   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpibaij_C",NULL);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1367 {
1368   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1369   PetscErrorCode ierr;
1370   PetscInt       nt;
1371 
1372   PetscFunctionBegin;
1373   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1374   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1375   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1376   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1377   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1378   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1379   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1380   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1385 {
1386   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1387   PetscErrorCode ierr;
1388 
1389   PetscFunctionBegin;
1390   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1391   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1392   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1393   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1398 {
1399   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1400   PetscErrorCode ierr;
1401 
1402   PetscFunctionBegin;
1403   /* do nondiagonal part */
1404   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1405   /* do local part */
1406   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1407   /* add partial results together */
1408   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1409   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1414 {
1415   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   /* do nondiagonal part */
1420   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1421   /* do local part */
1422   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1423   /* add partial results together */
1424   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1425   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 /*
1430   This only works correctly for square matrices where the subblock A->A is the
1431    diagonal block
1432 */
1433 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1434 {
1435   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1436   PetscErrorCode ierr;
1437 
1438   PetscFunctionBegin;
1439   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1440   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1445 {
1446   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1447   PetscErrorCode ierr;
1448 
1449   PetscFunctionBegin;
1450   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1451   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1456 {
1457   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1458   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1459   PetscErrorCode ierr;
1460   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1461   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1462   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1463 
1464   PetscFunctionBegin;
1465   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1466   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1467   mat->getrowactive = PETSC_TRUE;
1468 
1469   if (!mat->rowvalues && (idx || v)) {
1470     /*
1471         allocate enough space to hold information from the longest row.
1472     */
1473     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1474     PetscInt    max = 1,mbs = mat->mbs,tmp;
1475     for (i=0; i<mbs; i++) {
1476       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1477       if (max < tmp) max = tmp;
1478     }
1479     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1480   }
1481   lrow = row - brstart;
1482 
1483   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1484   if (!v)   {pvA = 0; pvB = 0;}
1485   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1486   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1487   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1488   nztot = nzA + nzB;
1489 
1490   cmap = mat->garray;
1491   if (v  || idx) {
1492     if (nztot) {
1493       /* Sort by increasing column numbers, assuming A and B already sorted */
1494       PetscInt imark = -1;
1495       if (v) {
1496         *v = v_p = mat->rowvalues;
1497         for (i=0; i<nzB; i++) {
1498           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1499           else break;
1500         }
1501         imark = i;
1502         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1503         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1504       }
1505       if (idx) {
1506         *idx = idx_p = mat->rowindices;
1507         if (imark > -1) {
1508           for (i=0; i<imark; i++) {
1509             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1510           }
1511         } else {
1512           for (i=0; i<nzB; i++) {
1513             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1514             else break;
1515           }
1516           imark = i;
1517         }
1518         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1519         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1520       }
1521     } else {
1522       if (idx) *idx = 0;
1523       if (v)   *v   = 0;
1524     }
1525   }
1526   *nz  = nztot;
1527   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1528   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1533 {
1534   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1535 
1536   PetscFunctionBegin;
1537   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1538   baij->getrowactive = PETSC_FALSE;
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1543 {
1544   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1545   PetscErrorCode ierr;
1546 
1547   PetscFunctionBegin;
1548   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1549   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1550   PetscFunctionReturn(0);
1551 }
1552 
1553 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1554 {
1555   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1556   Mat            A  = a->A,B = a->B;
1557   PetscErrorCode ierr;
1558   PetscReal      isend[5],irecv[5];
1559 
1560   PetscFunctionBegin;
1561   info->block_size = (PetscReal)matin->rmap->bs;
1562 
1563   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1564 
1565   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1566   isend[3] = info->memory;  isend[4] = info->mallocs;
1567 
1568   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1569 
1570   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1571   isend[3] += info->memory;  isend[4] += info->mallocs;
1572 
1573   if (flag == MAT_LOCAL) {
1574     info->nz_used      = isend[0];
1575     info->nz_allocated = isend[1];
1576     info->nz_unneeded  = isend[2];
1577     info->memory       = isend[3];
1578     info->mallocs      = isend[4];
1579   } else if (flag == MAT_GLOBAL_MAX) {
1580     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1581 
1582     info->nz_used      = irecv[0];
1583     info->nz_allocated = irecv[1];
1584     info->nz_unneeded  = irecv[2];
1585     info->memory       = irecv[3];
1586     info->mallocs      = irecv[4];
1587   } else if (flag == MAT_GLOBAL_SUM) {
1588     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1589 
1590     info->nz_used      = irecv[0];
1591     info->nz_allocated = irecv[1];
1592     info->nz_unneeded  = irecv[2];
1593     info->memory       = irecv[3];
1594     info->mallocs      = irecv[4];
1595   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1596   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1597   info->fill_ratio_needed = 0;
1598   info->factor_mallocs    = 0;
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1603 {
1604   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1605   PetscErrorCode ierr;
1606 
1607   PetscFunctionBegin;
1608   switch (op) {
1609   case MAT_NEW_NONZERO_LOCATIONS:
1610   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1611   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1612   case MAT_KEEP_NONZERO_PATTERN:
1613   case MAT_NEW_NONZERO_LOCATION_ERR:
1614     MatCheckPreallocated(A,1);
1615     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1616     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1617     break;
1618   case MAT_ROW_ORIENTED:
1619     MatCheckPreallocated(A,1);
1620     a->roworiented = flg;
1621 
1622     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1623     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1624     break;
1625   case MAT_NEW_DIAGONALS:
1626     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1627     break;
1628   case MAT_IGNORE_OFF_PROC_ENTRIES:
1629     a->donotstash = flg;
1630     break;
1631   case MAT_USE_HASH_TABLE:
1632     a->ht_flag = flg;
1633     a->ht_fact = 1.39;
1634     break;
1635   case MAT_SYMMETRIC:
1636   case MAT_STRUCTURALLY_SYMMETRIC:
1637   case MAT_HERMITIAN:
1638   case MAT_SUBMAT_SINGLEIS:
1639   case MAT_SYMMETRY_ETERNAL:
1640     MatCheckPreallocated(A,1);
1641     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1642     break;
1643   default:
1644     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1645   }
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1650 {
1651   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1652   Mat_SeqBAIJ    *Aloc;
1653   Mat            B;
1654   PetscErrorCode ierr;
1655   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1656   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1657   MatScalar      *a;
1658 
1659   PetscFunctionBegin;
1660   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1661     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1662     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1663     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1664     /* Do not know preallocation information, but must set block size */
1665     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1666   } else {
1667     B = *matout;
1668   }
1669 
1670   /* copy over the A part */
1671   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1672   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1673   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1674 
1675   for (i=0; i<mbs; i++) {
1676     rvals[0] = bs*(baij->rstartbs + i);
1677     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1678     for (j=ai[i]; j<ai[i+1]; j++) {
1679       col = (baij->cstartbs+aj[j])*bs;
1680       for (k=0; k<bs; k++) {
1681         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1682 
1683         col++; a += bs;
1684       }
1685     }
1686   }
1687   /* copy over the B part */
1688   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1689   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1690   for (i=0; i<mbs; i++) {
1691     rvals[0] = bs*(baij->rstartbs + i);
1692     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1693     for (j=ai[i]; j<ai[i+1]; j++) {
1694       col = baij->garray[aj[j]]*bs;
1695       for (k=0; k<bs; k++) {
1696         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1697         col++;
1698         a += bs;
1699       }
1700     }
1701   }
1702   ierr = PetscFree(rvals);CHKERRQ(ierr);
1703   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1704   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1705 
1706   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1707   else {
1708     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1709   }
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1714 {
1715   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1716   Mat            a     = baij->A,b = baij->B;
1717   PetscErrorCode ierr;
1718   PetscInt       s1,s2,s3;
1719 
1720   PetscFunctionBegin;
1721   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1722   if (rr) {
1723     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1724     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1725     /* Overlap communication with computation. */
1726     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1727   }
1728   if (ll) {
1729     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1730     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1731     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1732   }
1733   /* scale  the diagonal block */
1734   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1735 
1736   if (rr) {
1737     /* Do a scatter end and then right scale the off-diagonal block */
1738     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1739     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1740   }
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1745 {
1746   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1747   PetscInt      *lrows;
1748   PetscInt       r, len;
1749   PetscBool      cong;
1750   PetscErrorCode ierr;
1751 
1752   PetscFunctionBegin;
1753   /* get locally owned rows */
1754   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1755   /* fix right hand side if needed */
1756   if (x && b) {
1757     const PetscScalar *xx;
1758     PetscScalar       *bb;
1759 
1760     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1761     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1762     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1763     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1764     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1765   }
1766 
1767   /* actually zap the local rows */
1768   /*
1769         Zero the required rows. If the "diagonal block" of the matrix
1770      is square and the user wishes to set the diagonal we use separate
1771      code so that MatSetValues() is not called for each diagonal allocating
1772      new memory, thus calling lots of mallocs and slowing things down.
1773 
1774   */
1775   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1776   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1777   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1778   if ((diag != 0.0) && cong) {
1779     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1780   } else if (diag != 0.0) {
1781     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1782     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1783        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1784     for (r = 0; r < len; ++r) {
1785       const PetscInt row = lrows[r] + A->rmap->rstart;
1786       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1787     }
1788     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1789     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1790   } else {
1791     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1792   }
1793   ierr = PetscFree(lrows);CHKERRQ(ierr);
1794 
1795   /* only change matrix nonzero state if pattern was allowed to be changed */
1796   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1797     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1798     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1799   }
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1804 {
1805   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1806   PetscErrorCode    ierr;
1807   PetscMPIInt       n = A->rmap->n;
1808   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1809   PetscInt          *lrows,*owners = A->rmap->range;
1810   PetscSFNode       *rrows;
1811   PetscSF           sf;
1812   const PetscScalar *xx;
1813   PetscScalar       *bb,*mask;
1814   Vec               xmask,lmask;
1815   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1816   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1817   PetscScalar       *aa;
1818 
1819   PetscFunctionBegin;
1820   /* Create SF where leaves are input rows and roots are owned rows */
1821   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1822   for (r = 0; r < n; ++r) lrows[r] = -1;
1823   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1824   for (r = 0; r < N; ++r) {
1825     const PetscInt idx   = rows[r];
1826     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1827     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1828       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1829     }
1830     rrows[r].rank  = p;
1831     rrows[r].index = rows[r] - owners[p];
1832   }
1833   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1834   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1835   /* Collect flags for rows to be zeroed */
1836   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1837   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1838   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1839   /* Compress and put in row numbers */
1840   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1841   /* zero diagonal part of matrix */
1842   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1843   /* handle off diagonal part of matrix */
1844   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1845   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1846   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1847   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1848   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1849   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1850   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1851   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1852   if (x) {
1853     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1854     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1855     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1856     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1857   }
1858   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1859   /* remove zeroed rows of off diagonal matrix */
1860   for (i = 0; i < len; ++i) {
1861     row   = lrows[i];
1862     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1863     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1864     for (k = 0; k < count; ++k) {
1865       aa[0] = 0.0;
1866       aa   += bs;
1867     }
1868   }
1869   /* loop over all elements of off process part of matrix zeroing removed columns*/
1870   for (i = 0; i < l->B->rmap->N; ++i) {
1871     row = i/bs;
1872     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1873       for (k = 0; k < bs; ++k) {
1874         col = bs*baij->j[j] + k;
1875         if (PetscAbsScalar(mask[col])) {
1876           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1877           if (x) bb[i] -= aa[0]*xx[col];
1878           aa[0] = 0.0;
1879         }
1880       }
1881     }
1882   }
1883   if (x) {
1884     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1885     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1886   }
1887   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1888   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1889   ierr = PetscFree(lrows);CHKERRQ(ierr);
1890 
1891   /* only change matrix nonzero state if pattern was allowed to be changed */
1892   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1893     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1894     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1895   }
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1900 {
1901   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1902   PetscErrorCode ierr;
1903 
1904   PetscFunctionBegin;
1905   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1910 
1911 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1912 {
1913   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1914   Mat            a,b,c,d;
1915   PetscBool      flg;
1916   PetscErrorCode ierr;
1917 
1918   PetscFunctionBegin;
1919   a = matA->A; b = matA->B;
1920   c = matB->A; d = matB->B;
1921 
1922   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1923   if (flg) {
1924     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1925   }
1926   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1931 {
1932   PetscErrorCode ierr;
1933   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1934   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1935 
1936   PetscFunctionBegin;
1937   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1938   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1939     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1940   } else {
1941     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1942     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1943   }
1944   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1949 {
1950   PetscErrorCode ierr;
1951 
1952   PetscFunctionBegin;
1953   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1958 {
1959   PetscErrorCode ierr;
1960   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1961   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1962   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1963 
1964   PetscFunctionBegin;
1965   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1970 {
1971   PetscErrorCode ierr;
1972   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1973   PetscBLASInt   bnz,one=1;
1974   Mat_SeqBAIJ    *x,*y;
1975   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1976 
1977   PetscFunctionBegin;
1978   if (str == SAME_NONZERO_PATTERN) {
1979     PetscScalar alpha = a;
1980     x    = (Mat_SeqBAIJ*)xx->A->data;
1981     y    = (Mat_SeqBAIJ*)yy->A->data;
1982     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1983     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1984     x    = (Mat_SeqBAIJ*)xx->B->data;
1985     y    = (Mat_SeqBAIJ*)yy->B->data;
1986     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1987     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1988     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1989   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1990     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1991   } else {
1992     Mat      B;
1993     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1994     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1995     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1996     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1997     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1998     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1999     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2000     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
2001     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2002     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2003     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2004     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
2005     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2006     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2007     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2008     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2009   }
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2014 {
2015   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2016   PetscErrorCode ierr;
2017 
2018   PetscFunctionBegin;
2019   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2020   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2025 {
2026   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2027   PetscErrorCode ierr;
2028 
2029   PetscFunctionBegin;
2030   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2031   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2036 {
2037   PetscErrorCode ierr;
2038   IS             iscol_local;
2039   PetscInt       csize;
2040 
2041   PetscFunctionBegin;
2042   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2043   if (call == MAT_REUSE_MATRIX) {
2044     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2045     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2046   } else {
2047     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2048   }
2049   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2050   if (call == MAT_INITIAL_MATRIX) {
2051     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2052     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2053   }
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 /*
2058   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2059   in local and then by concatenating the local matrices the end result.
2060   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
2061   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
2062 */
2063 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2064 {
2065   PetscErrorCode ierr;
2066   PetscMPIInt    rank,size;
2067   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2068   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2069   Mat            M,Mreuse;
2070   MatScalar      *vwork,*aa;
2071   MPI_Comm       comm;
2072   IS             isrow_new, iscol_new;
2073   Mat_SeqBAIJ    *aij;
2074 
2075   PetscFunctionBegin;
2076   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2077   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2078   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2079   /* The compression and expansion should be avoided. Doesn't point
2080      out errors, might change the indices, hence buggey */
2081   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2082   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2083 
2084   if (call ==  MAT_REUSE_MATRIX) {
2085     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2086     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2087     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
2088   } else {
2089     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
2090   }
2091   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2092   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2093   /*
2094       m - number of local rows
2095       n - number of columns (same on all processors)
2096       rstart - first row in new global matrix generated
2097   */
2098   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2099   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2100   m    = m/bs;
2101   n    = n/bs;
2102 
2103   if (call == MAT_INITIAL_MATRIX) {
2104     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2105     ii  = aij->i;
2106     jj  = aij->j;
2107 
2108     /*
2109         Determine the number of non-zeros in the diagonal and off-diagonal
2110         portions of the matrix in order to do correct preallocation
2111     */
2112 
2113     /* first get start and end of "diagonal" columns */
2114     if (csize == PETSC_DECIDE) {
2115       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2116       if (mglobal == n*bs) { /* square matrix */
2117         nlocal = m;
2118       } else {
2119         nlocal = n/size + ((n % size) > rank);
2120       }
2121     } else {
2122       nlocal = csize/bs;
2123     }
2124     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2125     rstart = rend - nlocal;
2126     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2127 
2128     /* next, compute all the lengths */
2129     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2130     for (i=0; i<m; i++) {
2131       jend = ii[i+1] - ii[i];
2132       olen = 0;
2133       dlen = 0;
2134       for (j=0; j<jend; j++) {
2135         if (*jj < rstart || *jj >= rend) olen++;
2136         else dlen++;
2137         jj++;
2138       }
2139       olens[i] = olen;
2140       dlens[i] = dlen;
2141     }
2142     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2143     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2144     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2145     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2146     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2147     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2148   } else {
2149     PetscInt ml,nl;
2150 
2151     M    = *newmat;
2152     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2153     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2154     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2155     /*
2156          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2157        rather than the slower MatSetValues().
2158     */
2159     M->was_assembled = PETSC_TRUE;
2160     M->assembled     = PETSC_FALSE;
2161   }
2162   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2163   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2164   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2165   ii   = aij->i;
2166   jj   = aij->j;
2167   aa   = aij->a;
2168   for (i=0; i<m; i++) {
2169     row   = rstart/bs + i;
2170     nz    = ii[i+1] - ii[i];
2171     cwork = jj;     jj += nz;
2172     vwork = aa;     aa += nz*bs*bs;
2173     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2174   }
2175 
2176   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2177   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2178   *newmat = M;
2179 
2180   /* save submatrix used in processor for next request */
2181   if (call ==  MAT_INITIAL_MATRIX) {
2182     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2183     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2184   }
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2189 {
2190   MPI_Comm       comm,pcomm;
2191   PetscInt       clocal_size,nrows;
2192   const PetscInt *rows;
2193   PetscMPIInt    size;
2194   IS             crowp,lcolp;
2195   PetscErrorCode ierr;
2196 
2197   PetscFunctionBegin;
2198   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2199   /* make a collective version of 'rowp' */
2200   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2201   if (pcomm==comm) {
2202     crowp = rowp;
2203   } else {
2204     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2205     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2206     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2207     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2208   }
2209   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2210   /* make a local version of 'colp' */
2211   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2212   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2213   if (size==1) {
2214     lcolp = colp;
2215   } else {
2216     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2217   }
2218   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2219   /* now we just get the submatrix */
2220   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2221   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2222   /* clean up */
2223   if (pcomm!=comm) {
2224     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2225   }
2226   if (size>1) {
2227     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2228   }
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2233 {
2234   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2235   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2236 
2237   PetscFunctionBegin;
2238   if (nghosts) *nghosts = B->nbs;
2239   if (ghosts) *ghosts = baij->garray;
2240   PetscFunctionReturn(0);
2241 }
2242 
2243 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2244 {
2245   Mat            B;
2246   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2247   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2248   Mat_SeqAIJ     *b;
2249   PetscErrorCode ierr;
2250   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2251   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2252   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2253 
2254   PetscFunctionBegin;
2255   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2256   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2257 
2258   /* ----------------------------------------------------------------
2259      Tell every processor the number of nonzeros per row
2260   */
2261   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2262   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2263     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2264   }
2265   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2266   displs    = recvcounts + size;
2267   for (i=0; i<size; i++) {
2268     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2269     displs[i]     = A->rmap->range[i]/bs;
2270   }
2271 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2272   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2273 #else
2274   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2275   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2276 #endif
2277   /* ---------------------------------------------------------------
2278      Create the sequential matrix of the same type as the local block diagonal
2279   */
2280   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2281   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2282   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2283   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2284   b    = (Mat_SeqAIJ*)B->data;
2285 
2286   /*--------------------------------------------------------------------
2287     Copy my part of matrix column indices over
2288   */
2289   sendcount  = ad->nz + bd->nz;
2290   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2291   a_jsendbuf = ad->j;
2292   b_jsendbuf = bd->j;
2293   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2294   cnt        = 0;
2295   for (i=0; i<n; i++) {
2296 
2297     /* put in lower diagonal portion */
2298     m = bd->i[i+1] - bd->i[i];
2299     while (m > 0) {
2300       /* is it above diagonal (in bd (compressed) numbering) */
2301       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2302       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2303       m--;
2304     }
2305 
2306     /* put in diagonal portion */
2307     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2308       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2309     }
2310 
2311     /* put in upper diagonal portion */
2312     while (m-- > 0) {
2313       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2314     }
2315   }
2316   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2317 
2318   /*--------------------------------------------------------------------
2319     Gather all column indices to all processors
2320   */
2321   for (i=0; i<size; i++) {
2322     recvcounts[i] = 0;
2323     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2324       recvcounts[i] += lens[j];
2325     }
2326   }
2327   displs[0] = 0;
2328   for (i=1; i<size; i++) {
2329     displs[i] = displs[i-1] + recvcounts[i-1];
2330   }
2331 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2332   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2333 #else
2334   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2335 #endif
2336   /*--------------------------------------------------------------------
2337     Assemble the matrix into useable form (note numerical values not yet set)
2338   */
2339   /* set the b->ilen (length of each row) values */
2340   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2341   /* set the b->i indices */
2342   b->i[0] = 0;
2343   for (i=1; i<=A->rmap->N/bs; i++) {
2344     b->i[i] = b->i[i-1] + lens[i-1];
2345   }
2346   ierr = PetscFree(lens);CHKERRQ(ierr);
2347   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2348   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2349   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2350 
2351   if (A->symmetric) {
2352     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2353   } else if (A->hermitian) {
2354     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2355   } else if (A->structurally_symmetric) {
2356     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2357   }
2358   *newmat = B;
2359   PetscFunctionReturn(0);
2360 }
2361 
2362 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2363 {
2364   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2365   PetscErrorCode ierr;
2366   Vec            bb1 = 0;
2367 
2368   PetscFunctionBegin;
2369   if (flag == SOR_APPLY_UPPER) {
2370     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2371     PetscFunctionReturn(0);
2372   }
2373 
2374   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2375     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2376   }
2377 
2378   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2379     if (flag & SOR_ZERO_INITIAL_GUESS) {
2380       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2381       its--;
2382     }
2383 
2384     while (its--) {
2385       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2386       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2387 
2388       /* update rhs: bb1 = bb - B*x */
2389       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2390       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2391 
2392       /* local sweep */
2393       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2394     }
2395   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2396     if (flag & SOR_ZERO_INITIAL_GUESS) {
2397       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2398       its--;
2399     }
2400     while (its--) {
2401       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2402       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2403 
2404       /* update rhs: bb1 = bb - B*x */
2405       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2406       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2407 
2408       /* local sweep */
2409       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2410     }
2411   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2412     if (flag & SOR_ZERO_INITIAL_GUESS) {
2413       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2414       its--;
2415     }
2416     while (its--) {
2417       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2418       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2419 
2420       /* update rhs: bb1 = bb - B*x */
2421       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2422       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2423 
2424       /* local sweep */
2425       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2426     }
2427   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2428 
2429   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2434 {
2435   PetscErrorCode ierr;
2436   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2437   PetscInt       N,i,*garray = aij->garray;
2438   PetscInt       ib,jb,bs = A->rmap->bs;
2439   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2440   MatScalar      *a_val = a_aij->a;
2441   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2442   MatScalar      *b_val = b_aij->a;
2443   PetscReal      *work;
2444 
2445   PetscFunctionBegin;
2446   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2447   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2448   if (type == NORM_2) {
2449     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2450       for (jb=0; jb<bs; jb++) {
2451         for (ib=0; ib<bs; ib++) {
2452           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2453           a_val++;
2454         }
2455       }
2456     }
2457     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2458       for (jb=0; jb<bs; jb++) {
2459         for (ib=0; ib<bs; ib++) {
2460           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2461           b_val++;
2462         }
2463       }
2464     }
2465   } else if (type == NORM_1) {
2466     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2467       for (jb=0; jb<bs; jb++) {
2468         for (ib=0; ib<bs; ib++) {
2469           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2470           a_val++;
2471         }
2472       }
2473     }
2474     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2475       for (jb=0; jb<bs; jb++) {
2476        for (ib=0; ib<bs; ib++) {
2477           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2478           b_val++;
2479         }
2480       }
2481     }
2482   } else if (type == NORM_INFINITY) {
2483     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2484       for (jb=0; jb<bs; jb++) {
2485         for (ib=0; ib<bs; ib++) {
2486           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2487           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2488           a_val++;
2489         }
2490       }
2491     }
2492     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2493       for (jb=0; jb<bs; jb++) {
2494         for (ib=0; ib<bs; ib++) {
2495           int col = garray[b_aij->j[i]] * bs + jb;
2496           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2497           b_val++;
2498         }
2499       }
2500     }
2501   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2502   if (type == NORM_INFINITY) {
2503     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2504   } else {
2505     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2506   }
2507   ierr = PetscFree(work);CHKERRQ(ierr);
2508   if (type == NORM_2) {
2509     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2510   }
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2515 {
2516   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2517   PetscErrorCode ierr;
2518 
2519   PetscFunctionBegin;
2520   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2521   A->factorerrortype             = a->A->factorerrortype;
2522   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2523   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2524   PetscFunctionReturn(0);
2525 }
2526 
2527 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2528 {
2529   PetscErrorCode ierr;
2530   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2531   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2532 
2533   PetscFunctionBegin;
2534   if (!Y->preallocated) {
2535     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2536   } else if (!aij->nz) {
2537     PetscInt nonew = aij->nonew;
2538     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2539     aij->nonew = nonew;
2540   }
2541   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2546 {
2547   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2548   PetscErrorCode ierr;
2549 
2550   PetscFunctionBegin;
2551   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2552   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2553   if (d) {
2554     PetscInt rstart;
2555     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2556     *d += rstart/A->rmap->bs;
2557 
2558   }
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2563 {
2564   PetscFunctionBegin;
2565   *a = ((Mat_MPIBAIJ*)A->data)->A;
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 /* -------------------------------------------------------------------*/
2570 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2571                                        MatGetRow_MPIBAIJ,
2572                                        MatRestoreRow_MPIBAIJ,
2573                                        MatMult_MPIBAIJ,
2574                                 /* 4*/ MatMultAdd_MPIBAIJ,
2575                                        MatMultTranspose_MPIBAIJ,
2576                                        MatMultTransposeAdd_MPIBAIJ,
2577                                        0,
2578                                        0,
2579                                        0,
2580                                 /*10*/ 0,
2581                                        0,
2582                                        0,
2583                                        MatSOR_MPIBAIJ,
2584                                        MatTranspose_MPIBAIJ,
2585                                 /*15*/ MatGetInfo_MPIBAIJ,
2586                                        MatEqual_MPIBAIJ,
2587                                        MatGetDiagonal_MPIBAIJ,
2588                                        MatDiagonalScale_MPIBAIJ,
2589                                        MatNorm_MPIBAIJ,
2590                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2591                                        MatAssemblyEnd_MPIBAIJ,
2592                                        MatSetOption_MPIBAIJ,
2593                                        MatZeroEntries_MPIBAIJ,
2594                                 /*24*/ MatZeroRows_MPIBAIJ,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                 /*29*/ MatSetUp_MPIBAIJ,
2600                                        0,
2601                                        0,
2602                                        MatGetDiagonalBlock_MPIBAIJ,
2603                                        0,
2604                                 /*34*/ MatDuplicate_MPIBAIJ,
2605                                        0,
2606                                        0,
2607                                        0,
2608                                        0,
2609                                 /*39*/ MatAXPY_MPIBAIJ,
2610                                        MatCreateSubMatrices_MPIBAIJ,
2611                                        MatIncreaseOverlap_MPIBAIJ,
2612                                        MatGetValues_MPIBAIJ,
2613                                        MatCopy_MPIBAIJ,
2614                                 /*44*/ 0,
2615                                        MatScale_MPIBAIJ,
2616                                        MatShift_MPIBAIJ,
2617                                        0,
2618                                        MatZeroRowsColumns_MPIBAIJ,
2619                                 /*49*/ 0,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2625                                        0,
2626                                        MatSetUnfactored_MPIBAIJ,
2627                                        MatPermute_MPIBAIJ,
2628                                        MatSetValuesBlocked_MPIBAIJ,
2629                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2630                                        MatDestroy_MPIBAIJ,
2631                                        MatView_MPIBAIJ,
2632                                        0,
2633                                        0,
2634                                 /*64*/ 0,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                 /*74*/ 0,
2645                                        MatFDColoringApply_BAIJ,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                 /*79*/ 0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                        MatLoad_MPIBAIJ,
2654                                 /*84*/ 0,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                 /*89*/ 0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                 /*94*/ 0,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                 /*99*/ 0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                 /*104*/0,
2675                                        MatRealPart_MPIBAIJ,
2676                                        MatImaginaryPart_MPIBAIJ,
2677                                        0,
2678                                        0,
2679                                 /*109*/0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        MatMissingDiagonal_MPIBAIJ,
2684                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2685                                        0,
2686                                        MatGetGhosts_MPIBAIJ,
2687                                        0,
2688                                        0,
2689                                 /*119*/0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                        MatGetMultiProcBlock_MPIBAIJ,
2694                                 /*124*/0,
2695                                        MatGetColumnNorms_MPIBAIJ,
2696                                        MatInvertBlockDiagonal_MPIBAIJ,
2697                                        0,
2698                                        0,
2699                                /*129*/ 0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                        0,
2704                                /*134*/ 0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                /*139*/ MatSetBlockSizes_Default,
2710                                        0,
2711                                        0,
2712                                        MatFDColoringSetUp_MPIXAIJ,
2713                                        0,
2714                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2715 };
2716 
2717 
2718 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2719 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2720 
2721 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2722 {
2723   PetscInt       m,rstart,cstart,cend;
2724   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2725   const PetscInt *JJ    =0;
2726   PetscScalar    *values=0;
2727   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2728   PetscErrorCode ierr;
2729 
2730   PetscFunctionBegin;
2731   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2732   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2733   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2734   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2735   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2736   m      = B->rmap->n/bs;
2737   rstart = B->rmap->rstart/bs;
2738   cstart = B->cmap->rstart/bs;
2739   cend   = B->cmap->rend/bs;
2740 
2741   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2742   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2743   for (i=0; i<m; i++) {
2744     nz = ii[i+1] - ii[i];
2745     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2746     nz_max = PetscMax(nz_max,nz);
2747     dlen   = 0;
2748     olen   = 0;
2749     JJ     = jj + ii[i];
2750     for (j=0; j<nz; j++) {
2751       if (*JJ < cstart || *JJ >= cend) olen++;
2752       else dlen++;
2753       JJ++;
2754     }
2755     d_nnz[i] = dlen;
2756     o_nnz[i] = olen;
2757   }
2758   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2759   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2760 
2761   values = (PetscScalar*)V;
2762   if (!values) {
2763     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2764   }
2765   for (i=0; i<m; i++) {
2766     PetscInt          row    = i + rstart;
2767     PetscInt          ncols  = ii[i+1] - ii[i];
2768     const PetscInt    *icols = jj + ii[i];
2769     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2770       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2771       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2772     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2773       PetscInt j;
2774       for (j=0; j<ncols; j++) {
2775         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2776         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2777       }
2778     }
2779   }
2780 
2781   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2782   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2783   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2784   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2785   PetscFunctionReturn(0);
2786 }
2787 
2788 /*@C
2789    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2790    (the default parallel PETSc format).
2791 
2792    Collective on MPI_Comm
2793 
2794    Input Parameters:
2795 +  B - the matrix
2796 .  bs - the block size
2797 .  i - the indices into j for the start of each local row (starts with zero)
2798 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2799 -  v - optional values in the matrix
2800 
2801    Level: developer
2802 
2803    Notes:
2804     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2805    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2806    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2807    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2808    block column and the second index is over columns within a block.
2809 
2810 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2811 @*/
2812 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2813 {
2814   PetscErrorCode ierr;
2815 
2816   PetscFunctionBegin;
2817   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2818   PetscValidType(B,1);
2819   PetscValidLogicalCollectiveInt(B,bs,2);
2820   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2821   PetscFunctionReturn(0);
2822 }
2823 
2824 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2825 {
2826   Mat_MPIBAIJ    *b;
2827   PetscErrorCode ierr;
2828   PetscInt       i;
2829   PetscMPIInt    size;
2830 
2831   PetscFunctionBegin;
2832   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2833   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2834   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2835   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2836 
2837   if (d_nnz) {
2838     for (i=0; i<B->rmap->n/bs; i++) {
2839       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2840     }
2841   }
2842   if (o_nnz) {
2843     for (i=0; i<B->rmap->n/bs; i++) {
2844       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2845     }
2846   }
2847 
2848   b      = (Mat_MPIBAIJ*)B->data;
2849   b->bs2 = bs*bs;
2850   b->mbs = B->rmap->n/bs;
2851   b->nbs = B->cmap->n/bs;
2852   b->Mbs = B->rmap->N/bs;
2853   b->Nbs = B->cmap->N/bs;
2854 
2855   for (i=0; i<=b->size; i++) {
2856     b->rangebs[i] = B->rmap->range[i]/bs;
2857   }
2858   b->rstartbs = B->rmap->rstart/bs;
2859   b->rendbs   = B->rmap->rend/bs;
2860   b->cstartbs = B->cmap->rstart/bs;
2861   b->cendbs   = B->cmap->rend/bs;
2862 
2863 #if defined(PETSC_USE_CTABLE)
2864   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2865 #else
2866   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2867 #endif
2868   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2869   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2870   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2871 
2872   /* Because the B will have been resized we simply destroy it and create a new one each time */
2873   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2874   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2875   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2876   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2877   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2878   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2879 
2880   if (!B->preallocated) {
2881     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2882     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2883     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2884     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2885     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2886   }
2887 
2888   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2889   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2890   B->preallocated  = PETSC_TRUE;
2891   B->was_assembled = PETSC_FALSE;
2892   B->assembled     = PETSC_FALSE;
2893   PetscFunctionReturn(0);
2894 }
2895 
2896 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2897 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2898 
2899 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2900 {
2901   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2902   PetscErrorCode ierr;
2903   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2904   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2905   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2906 
2907   PetscFunctionBegin;
2908   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2909   ii[0] = 0;
2910   for (i=0; i<M; i++) {
2911     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2912     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2913     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2914     /* remove one from count of matrix has diagonal */
2915     for (j=id[i]; j<id[i+1]; j++) {
2916       if (jd[j] == i) {ii[i+1]--;break;}
2917     }
2918   }
2919   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2920   cnt  = 0;
2921   for (i=0; i<M; i++) {
2922     for (j=io[i]; j<io[i+1]; j++) {
2923       if (garray[jo[j]] > rstart) break;
2924       jj[cnt++] = garray[jo[j]];
2925     }
2926     for (k=id[i]; k<id[i+1]; k++) {
2927       if (jd[k] != i) {
2928         jj[cnt++] = rstart + jd[k];
2929       }
2930     }
2931     for (; j<io[i+1]; j++) {
2932       jj[cnt++] = garray[jo[j]];
2933     }
2934   }
2935   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2936   PetscFunctionReturn(0);
2937 }
2938 
2939 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2940 
2941 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2942 
2943 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2944 {
2945   PetscErrorCode ierr;
2946   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2947   Mat            B;
2948   Mat_MPIAIJ     *b;
2949 
2950   PetscFunctionBegin;
2951   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2952 
2953   if (reuse == MAT_REUSE_MATRIX) {
2954     B = *newmat;
2955   } else {
2956     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2957     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2958     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2959     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2960     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2961     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2962   }
2963   b = (Mat_MPIAIJ*) B->data;
2964 
2965   if (reuse == MAT_REUSE_MATRIX) {
2966     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2967     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2968   } else {
2969     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2970     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2971     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2972     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2973     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2974     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2975     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2976   }
2977   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2978   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2979 
2980   if (reuse == MAT_INPLACE_MATRIX) {
2981     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2982   } else {
2983    *newmat = B;
2984   }
2985   PetscFunctionReturn(0);
2986 }
2987 
2988 /*MC
2989    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2990 
2991    Options Database Keys:
2992 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2993 . -mat_block_size <bs> - set the blocksize used to store the matrix
2994 - -mat_use_hash_table <fact>
2995 
2996   Level: beginner
2997 
2998 .seealso: MatCreateMPIBAIJ
2999 M*/
3000 
3001 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3002 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
3003 
3004 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3005 {
3006   Mat_MPIBAIJ    *b;
3007   PetscErrorCode ierr;
3008   PetscBool      flg = PETSC_FALSE;
3009 
3010   PetscFunctionBegin;
3011   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3012   B->data = (void*)b;
3013 
3014   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3015   B->assembled = PETSC_FALSE;
3016 
3017   B->insertmode = NOT_SET_VALUES;
3018   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3019   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3020 
3021   /* build local table of row and column ownerships */
3022   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
3023 
3024   /* build cache for off array entries formed */
3025   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3026 
3027   b->donotstash  = PETSC_FALSE;
3028   b->colmap      = NULL;
3029   b->garray      = NULL;
3030   b->roworiented = PETSC_TRUE;
3031 
3032   /* stuff used in block assembly */
3033   b->barray = 0;
3034 
3035   /* stuff used for matrix vector multiply */
3036   b->lvec  = 0;
3037   b->Mvctx = 0;
3038 
3039   /* stuff for MatGetRow() */
3040   b->rowindices   = 0;
3041   b->rowvalues    = 0;
3042   b->getrowactive = PETSC_FALSE;
3043 
3044   /* hash table stuff */
3045   b->ht           = 0;
3046   b->hd           = 0;
3047   b->ht_size      = 0;
3048   b->ht_flag      = PETSC_FALSE;
3049   b->ht_fact      = 0;
3050   b->ht_total_ct  = 0;
3051   b->ht_insert_ct = 0;
3052 
3053   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
3054   b->ijonly = PETSC_FALSE;
3055 
3056 
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3060 #if defined(PETSC_HAVE_HYPRE)
3061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3062 #endif
3063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3067   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3068   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3069   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpibaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3070   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3071   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3072 
3073   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3074   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
3075   if (flg) {
3076     PetscReal fact = 1.39;
3077     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3078     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3079     if (fact <= 1.0) fact = 1.39;
3080     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3081     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3082   }
3083   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3084   PetscFunctionReturn(0);
3085 }
3086 
3087 /*MC
3088    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3089 
3090    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3091    and MATMPIBAIJ otherwise.
3092 
3093    Options Database Keys:
3094 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3095 
3096   Level: beginner
3097 
3098 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3099 M*/
3100 
3101 /*@C
3102    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3103    (block compressed row).  For good matrix assembly performance
3104    the user should preallocate the matrix storage by setting the parameters
3105    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3106    performance can be increased by more than a factor of 50.
3107 
3108    Collective on Mat
3109 
3110    Input Parameters:
3111 +  B - the matrix
3112 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3113           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3114 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3115            submatrix  (same for all local rows)
3116 .  d_nnz - array containing the number of block nonzeros in the various block rows
3117            of the in diagonal portion of the local (possibly different for each block
3118            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3119            set it even if it is zero.
3120 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3121            submatrix (same for all local rows).
3122 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3123            off-diagonal portion of the local submatrix (possibly different for
3124            each block row) or NULL.
3125 
3126    If the *_nnz parameter is given then the *_nz parameter is ignored
3127 
3128    Options Database Keys:
3129 +   -mat_block_size - size of the blocks to use
3130 -   -mat_use_hash_table <fact>
3131 
3132    Notes:
3133    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3134    than it must be used on all processors that share the object for that argument.
3135 
3136    Storage Information:
3137    For a square global matrix we define each processor's diagonal portion
3138    to be its local rows and the corresponding columns (a square submatrix);
3139    each processor's off-diagonal portion encompasses the remainder of the
3140    local matrix (a rectangular submatrix).
3141 
3142    The user can specify preallocated storage for the diagonal part of
3143    the local submatrix with either d_nz or d_nnz (not both).  Set
3144    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3145    memory allocation.  Likewise, specify preallocated storage for the
3146    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3147 
3148    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3149    the figure below we depict these three local rows and all columns (0-11).
3150 
3151 .vb
3152            0 1 2 3 4 5 6 7 8 9 10 11
3153           --------------------------
3154    row 3  |o o o d d d o o o o  o  o
3155    row 4  |o o o d d d o o o o  o  o
3156    row 5  |o o o d d d o o o o  o  o
3157           --------------------------
3158 .ve
3159 
3160    Thus, any entries in the d locations are stored in the d (diagonal)
3161    submatrix, and any entries in the o locations are stored in the
3162    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3163    stored simply in the MATSEQBAIJ format for compressed row storage.
3164 
3165    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3166    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3167    In general, for PDE problems in which most nonzeros are near the diagonal,
3168    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3169    or you will get TERRIBLE performance; see the users' manual chapter on
3170    matrices.
3171 
3172    You can call MatGetInfo() to get information on how effective the preallocation was;
3173    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3174    You can also run with the option -info and look for messages with the string
3175    malloc in them to see if additional memory allocation was needed.
3176 
3177    Level: intermediate
3178 
3179 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3180 @*/
3181 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3182 {
3183   PetscErrorCode ierr;
3184 
3185   PetscFunctionBegin;
3186   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3187   PetscValidType(B,1);
3188   PetscValidLogicalCollectiveInt(B,bs,2);
3189   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3190   PetscFunctionReturn(0);
3191 }
3192 
3193 /*@C
3194    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3195    (block compressed row).  For good matrix assembly performance
3196    the user should preallocate the matrix storage by setting the parameters
3197    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3198    performance can be increased by more than a factor of 50.
3199 
3200    Collective on MPI_Comm
3201 
3202    Input Parameters:
3203 +  comm - MPI communicator
3204 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3205           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3206 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3207            This value should be the same as the local size used in creating the
3208            y vector for the matrix-vector product y = Ax.
3209 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3210            This value should be the same as the local size used in creating the
3211            x vector for the matrix-vector product y = Ax.
3212 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3213 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3214 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3215            submatrix  (same for all local rows)
3216 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3217            of the in diagonal portion of the local (possibly different for each block
3218            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3219            and set it even if it is zero.
3220 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3221            submatrix (same for all local rows).
3222 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3223            off-diagonal portion of the local submatrix (possibly different for
3224            each block row) or NULL.
3225 
3226    Output Parameter:
3227 .  A - the matrix
3228 
3229    Options Database Keys:
3230 +   -mat_block_size - size of the blocks to use
3231 -   -mat_use_hash_table <fact>
3232 
3233    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3234    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3235    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3236 
3237    Notes:
3238    If the *_nnz parameter is given then the *_nz parameter is ignored
3239 
3240    A nonzero block is any block that as 1 or more nonzeros in it
3241 
3242    The user MUST specify either the local or global matrix dimensions
3243    (possibly both).
3244 
3245    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3246    than it must be used on all processors that share the object for that argument.
3247 
3248    Storage Information:
3249    For a square global matrix we define each processor's diagonal portion
3250    to be its local rows and the corresponding columns (a square submatrix);
3251    each processor's off-diagonal portion encompasses the remainder of the
3252    local matrix (a rectangular submatrix).
3253 
3254    The user can specify preallocated storage for the diagonal part of
3255    the local submatrix with either d_nz or d_nnz (not both).  Set
3256    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3257    memory allocation.  Likewise, specify preallocated storage for the
3258    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3259 
3260    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3261    the figure below we depict these three local rows and all columns (0-11).
3262 
3263 .vb
3264            0 1 2 3 4 5 6 7 8 9 10 11
3265           --------------------------
3266    row 3  |o o o d d d o o o o  o  o
3267    row 4  |o o o d d d o o o o  o  o
3268    row 5  |o o o d d d o o o o  o  o
3269           --------------------------
3270 .ve
3271 
3272    Thus, any entries in the d locations are stored in the d (diagonal)
3273    submatrix, and any entries in the o locations are stored in the
3274    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3275    stored simply in the MATSEQBAIJ format for compressed row storage.
3276 
3277    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3278    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3279    In general, for PDE problems in which most nonzeros are near the diagonal,
3280    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3281    or you will get TERRIBLE performance; see the users' manual chapter on
3282    matrices.
3283 
3284    Level: intermediate
3285 
3286 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3287 @*/
3288 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3289 {
3290   PetscErrorCode ierr;
3291   PetscMPIInt    size;
3292 
3293   PetscFunctionBegin;
3294   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3295   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3296   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3297   if (size > 1) {
3298     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3299     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3300   } else {
3301     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3302     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3303   }
3304   PetscFunctionReturn(0);
3305 }
3306 
3307 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3308 {
3309   Mat            mat;
3310   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3311   PetscErrorCode ierr;
3312   PetscInt       len=0;
3313 
3314   PetscFunctionBegin;
3315   *newmat = 0;
3316   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3317   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3318   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3319 
3320   mat->factortype   = matin->factortype;
3321   mat->preallocated = PETSC_TRUE;
3322   mat->assembled    = PETSC_TRUE;
3323   mat->insertmode   = NOT_SET_VALUES;
3324 
3325   a             = (Mat_MPIBAIJ*)mat->data;
3326   mat->rmap->bs = matin->rmap->bs;
3327   a->bs2        = oldmat->bs2;
3328   a->mbs        = oldmat->mbs;
3329   a->nbs        = oldmat->nbs;
3330   a->Mbs        = oldmat->Mbs;
3331   a->Nbs        = oldmat->Nbs;
3332 
3333   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3334   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3335 
3336   a->size         = oldmat->size;
3337   a->rank         = oldmat->rank;
3338   a->donotstash   = oldmat->donotstash;
3339   a->roworiented  = oldmat->roworiented;
3340   a->rowindices   = 0;
3341   a->rowvalues    = 0;
3342   a->getrowactive = PETSC_FALSE;
3343   a->barray       = 0;
3344   a->rstartbs     = oldmat->rstartbs;
3345   a->rendbs       = oldmat->rendbs;
3346   a->cstartbs     = oldmat->cstartbs;
3347   a->cendbs       = oldmat->cendbs;
3348 
3349   /* hash table stuff */
3350   a->ht           = 0;
3351   a->hd           = 0;
3352   a->ht_size      = 0;
3353   a->ht_flag      = oldmat->ht_flag;
3354   a->ht_fact      = oldmat->ht_fact;
3355   a->ht_total_ct  = 0;
3356   a->ht_insert_ct = 0;
3357 
3358   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3359   if (oldmat->colmap) {
3360 #if defined(PETSC_USE_CTABLE)
3361     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3362 #else
3363     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3364     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3365     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3366 #endif
3367   } else a->colmap = 0;
3368 
3369   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3370     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3371     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3372     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3373   } else a->garray = 0;
3374 
3375   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3376   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3377   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3378   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3379   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3380 
3381   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3382   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3383   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3384   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3385   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3386   *newmat = mat;
3387   PetscFunctionReturn(0);
3388 }
3389 
3390 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3391 {
3392   PetscErrorCode ierr;
3393   int            fd;
3394   PetscInt       i,nz,j,rstart,rend;
3395   PetscScalar    *vals,*buf;
3396   MPI_Comm       comm;
3397   MPI_Status     status;
3398   PetscMPIInt    rank,size,maxnz;
3399   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3400   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3401   PetscInt       jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
3402   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3403   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3404   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3405   PetscBool      isbinary;
3406 
3407   PetscFunctionBegin;
3408   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3409   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
3410 
3411   /* force binary viewer to load .info file if it has not yet done so */
3412   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3413   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3414   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3415   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3416   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3417   if (bs < 0) bs = 1;
3418 
3419   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3420   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3421   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3422   if (!rank) {
3423     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3424     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3425     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3426   }
3427   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3428   M    = header[1]; N = header[2];
3429 
3430   /* If global sizes are set, check if they are consistent with that given in the file */
3431   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
3432   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
3433 
3434   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3435 
3436   /*
3437      This code adds extra rows to make sure the number of rows is
3438      divisible by the blocksize
3439   */
3440   Mbs        = M/bs;
3441   extra_rows = bs - M + bs*Mbs;
3442   if (extra_rows == bs) extra_rows = 0;
3443   else                  Mbs++;
3444   if (extra_rows && !rank) {
3445     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3446   }
3447 
3448   /* determine ownership of all rows */
3449   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3450     mbs = Mbs/size + ((Mbs % size) > rank);
3451     m   = mbs*bs;
3452   } else { /* User set */
3453     m   = newmat->rmap->n;
3454     mbs = m/bs;
3455   }
3456   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3457   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3458 
3459   /* process 0 needs enough room for process with most rows */
3460   if (!rank) {
3461     mmax = rowners[1];
3462     for (i=2; i<=size; i++) {
3463       mmax = PetscMax(mmax,rowners[i]);
3464     }
3465     mmax*=bs;
3466   } else mmax = -1;             /* unused, but compiler warns anyway */
3467 
3468   rowners[0] = 0;
3469   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3470   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3471   rstart = rowners[rank];
3472   rend   = rowners[rank+1];
3473 
3474   /* distribute row lengths to all processors */
3475   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3476   if (!rank) {
3477     mend = m;
3478     if (size == 1) mend = mend - extra_rows;
3479     ierr = PetscBinaryRead(fd,locrowlens,mend,NULL,PETSC_INT);CHKERRQ(ierr);
3480     for (j=mend; j<m; j++) locrowlens[j] = 1;
3481     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3482     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3483     for (j=0; j<m; j++) {
3484       procsnz[0] += locrowlens[j];
3485     }
3486     for (i=1; i<size; i++) {
3487       mend = browners[i+1] - browners[i];
3488       if (i == size-1) mend = mend - extra_rows;
3489       ierr = PetscBinaryRead(fd,rowlengths,mend,NULL,PETSC_INT);CHKERRQ(ierr);
3490       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3491       /* calculate the number of nonzeros on each processor */
3492       for (j=0; j<browners[i+1]-browners[i]; j++) {
3493         procsnz[i] += rowlengths[j];
3494       }
3495       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3496     }
3497     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3498   } else {
3499     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3500   }
3501 
3502   if (!rank) {
3503     /* determine max buffer needed and allocate it */
3504     maxnz = procsnz[0];
3505     for (i=1; i<size; i++) {
3506       maxnz = PetscMax(maxnz,procsnz[i]);
3507     }
3508     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3509 
3510     /* read in my part of the matrix column indices  */
3511     nz     = procsnz[0];
3512     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3513     mycols = ibuf;
3514     if (size == 1) nz -= extra_rows;
3515     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3516     if (size == 1) {
3517       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3518     }
3519 
3520     /* read in every ones (except the last) and ship off */
3521     for (i=1; i<size-1; i++) {
3522       nz   = procsnz[i];
3523       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3524       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3525     }
3526     /* read in the stuff for the last proc */
3527     if (size != 1) {
3528       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3529       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3530       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3531       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3532     }
3533     ierr = PetscFree(cols);CHKERRQ(ierr);
3534   } else {
3535     /* determine buffer space needed for message */
3536     nz = 0;
3537     for (i=0; i<m; i++) {
3538       nz += locrowlens[i];
3539     }
3540     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3541     mycols = ibuf;
3542     /* receive message of column indices*/
3543     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3544     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3545     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3546   }
3547 
3548   /* loop over local rows, determining number of off diagonal entries */
3549   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3550   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3551   rowcount = 0; nzcount = 0;
3552   for (i=0; i<mbs; i++) {
3553     dcount  = 0;
3554     odcount = 0;
3555     for (j=0; j<bs; j++) {
3556       kmax = locrowlens[rowcount];
3557       for (k=0; k<kmax; k++) {
3558         tmp = mycols[nzcount++]/bs;
3559         if (!mask[tmp]) {
3560           mask[tmp] = 1;
3561           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3562           else masked1[dcount++] = tmp;
3563         }
3564       }
3565       rowcount++;
3566     }
3567 
3568     dlens[i]  = dcount;
3569     odlens[i] = odcount;
3570 
3571     /* zero out the mask elements we set */
3572     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3573     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3574   }
3575 
3576   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3577   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3578 
3579   if (!rank) {
3580     ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr);
3581     /* read in my part of the matrix numerical values  */
3582     nz     = procsnz[0];
3583     vals   = buf;
3584     mycols = ibuf;
3585     if (size == 1) nz -= extra_rows;
3586     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3587     if (size == 1) {
3588       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3589     }
3590 
3591     /* insert into matrix */
3592     jj = rstart*bs;
3593     for (i=0; i<m; i++) {
3594       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3595       mycols += locrowlens[i];
3596       vals   += locrowlens[i];
3597       jj++;
3598     }
3599     /* read in other processors (except the last one) and ship out */
3600     for (i=1; i<size-1; i++) {
3601       nz   = procsnz[i];
3602       vals = buf;
3603       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3604       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3605     }
3606     /* the last proc */
3607     if (size != 1) {
3608       nz   = procsnz[i] - extra_rows;
3609       vals = buf;
3610       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3611       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3612       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3613     }
3614     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3615   } else {
3616     /* receive numeric values */
3617     ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr);
3618 
3619     /* receive message of values*/
3620     vals   = buf;
3621     mycols = ibuf;
3622     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3623 
3624     /* insert into matrix */
3625     jj = rstart*bs;
3626     for (i=0; i<m; i++) {
3627       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3628       mycols += locrowlens[i];
3629       vals   += locrowlens[i];
3630       jj++;
3631     }
3632   }
3633   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3634   ierr = PetscFree(buf);CHKERRQ(ierr);
3635   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3636   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3637   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3638   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3639   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3640   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3641   PetscFunctionReturn(0);
3642 }
3643 
3644 /*@
3645    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3646 
3647    Input Parameters:
3648 .  mat  - the matrix
3649 .  fact - factor
3650 
3651    Not Collective, each process can use a different factor
3652 
3653    Level: advanced
3654 
3655   Notes:
3656    This can also be set by the command line option: -mat_use_hash_table <fact>
3657 
3658 .seealso: MatSetOption()
3659 @*/
3660 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3661 {
3662   PetscErrorCode ierr;
3663 
3664   PetscFunctionBegin;
3665   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3666   PetscFunctionReturn(0);
3667 }
3668 
3669 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3670 {
3671   Mat_MPIBAIJ *baij;
3672 
3673   PetscFunctionBegin;
3674   baij          = (Mat_MPIBAIJ*)mat->data;
3675   baij->ht_fact = fact;
3676   PetscFunctionReturn(0);
3677 }
3678 
3679 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3680 {
3681   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3682   PetscBool      flg;
3683   PetscErrorCode ierr;
3684 
3685   PetscFunctionBegin;
3686   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3687   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3688   if (Ad)     *Ad     = a->A;
3689   if (Ao)     *Ao     = a->B;
3690   if (colmap) *colmap = a->garray;
3691   PetscFunctionReturn(0);
3692 }
3693 
3694 /*
3695     Special version for direct calls from Fortran (to eliminate two function call overheads
3696 */
3697 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3698 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3699 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3700 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3701 #endif
3702 
3703 /*@C
3704   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3705 
3706   Collective on Mat
3707 
3708   Input Parameters:
3709 + mat - the matrix
3710 . min - number of input rows
3711 . im - input rows
3712 . nin - number of input columns
3713 . in - input columns
3714 . v - numerical values input
3715 - addvin - INSERT_VALUES or ADD_VALUES
3716 
3717   Notes:
3718     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3719 
3720   Level: advanced
3721 
3722 .seealso:   MatSetValuesBlocked()
3723 @*/
3724 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3725 {
3726   /* convert input arguments to C version */
3727   Mat        mat  = *matin;
3728   PetscInt   m    = *min, n = *nin;
3729   InsertMode addv = *addvin;
3730 
3731   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3732   const MatScalar *value;
3733   MatScalar       *barray     = baij->barray;
3734   PetscBool       roworiented = baij->roworiented;
3735   PetscErrorCode  ierr;
3736   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3737   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3738   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3739 
3740   PetscFunctionBegin;
3741   /* tasks normally handled by MatSetValuesBlocked() */
3742   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3743 #if defined(PETSC_USE_DEBUG)
3744   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3745   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3746 #endif
3747   if (mat->assembled) {
3748     mat->was_assembled = PETSC_TRUE;
3749     mat->assembled     = PETSC_FALSE;
3750   }
3751   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3752 
3753 
3754   if (!barray) {
3755     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3756     baij->barray = barray;
3757   }
3758 
3759   if (roworiented) stepval = (n-1)*bs;
3760   else stepval = (m-1)*bs;
3761 
3762   for (i=0; i<m; i++) {
3763     if (im[i] < 0) continue;
3764 #if defined(PETSC_USE_DEBUG)
3765     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3766 #endif
3767     if (im[i] >= rstart && im[i] < rend) {
3768       row = im[i] - rstart;
3769       for (j=0; j<n; j++) {
3770         /* If NumCol = 1 then a copy is not required */
3771         if ((roworiented) && (n == 1)) {
3772           barray = (MatScalar*)v + i*bs2;
3773         } else if ((!roworiented) && (m == 1)) {
3774           barray = (MatScalar*)v + j*bs2;
3775         } else { /* Here a copy is required */
3776           if (roworiented) {
3777             value = v + i*(stepval+bs)*bs + j*bs;
3778           } else {
3779             value = v + j*(stepval+bs)*bs + i*bs;
3780           }
3781           for (ii=0; ii<bs; ii++,value+=stepval) {
3782             for (jj=0; jj<bs; jj++) {
3783               *barray++ = *value++;
3784             }
3785           }
3786           barray -=bs2;
3787         }
3788 
3789         if (in[j] >= cstart && in[j] < cend) {
3790           col  = in[j] - cstart;
3791           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3792         } else if (in[j] < 0) continue;
3793 #if defined(PETSC_USE_DEBUG)
3794         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3795 #endif
3796         else {
3797           if (mat->was_assembled) {
3798             if (!baij->colmap) {
3799               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3800             }
3801 
3802 #if defined(PETSC_USE_DEBUG)
3803 #if defined(PETSC_USE_CTABLE)
3804             { PetscInt data;
3805               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3806               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3807             }
3808 #else
3809             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3810 #endif
3811 #endif
3812 #if defined(PETSC_USE_CTABLE)
3813             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3814             col  = (col - 1)/bs;
3815 #else
3816             col = (baij->colmap[in[j]] - 1)/bs;
3817 #endif
3818             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3819               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3820               col  =  in[j];
3821             }
3822           } else col = in[j];
3823           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3824         }
3825       }
3826     } else {
3827       if (!baij->donotstash) {
3828         if (roworiented) {
3829           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3830         } else {
3831           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3832         }
3833       }
3834     }
3835   }
3836 
3837   /* task normally handled by MatSetValuesBlocked() */
3838   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3839   PetscFunctionReturn(0);
3840 }
3841 
3842 /*@
3843      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3844          CSR format the local rows.
3845 
3846    Collective on MPI_Comm
3847 
3848    Input Parameters:
3849 +  comm - MPI communicator
3850 .  bs - the block size, only a block size of 1 is supported
3851 .  m - number of local rows (Cannot be PETSC_DECIDE)
3852 .  n - This value should be the same as the local size used in creating the
3853        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3854        calculated if N is given) For square matrices n is almost always m.
3855 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3856 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3857 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3858 .   j - column indices
3859 -   a - matrix values
3860 
3861    Output Parameter:
3862 .   mat - the matrix
3863 
3864    Level: intermediate
3865 
3866    Notes:
3867        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3868      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3869      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3870 
3871      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3872      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3873      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3874      with column-major ordering within blocks.
3875 
3876        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3877 
3878 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3879           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3880 @*/
3881 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3882 {
3883   PetscErrorCode ierr;
3884 
3885   PetscFunctionBegin;
3886   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3887   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3888   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3889   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3890   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3891   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3892   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3893   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3894   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3895   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3896   PetscFunctionReturn(0);
3897 }
3898 
3899 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3900 {
3901   PetscErrorCode ierr;
3902   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3903   PetscInt       *indx;
3904   PetscScalar    *values;
3905 
3906   PetscFunctionBegin;
3907   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3908   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3909     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3910     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3911     PetscInt       *bindx,rmax=a->rmax,j;
3912     PetscMPIInt    rank,size;
3913 
3914     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3915     mbs = m/bs; Nbs = N/cbs;
3916     if (n == PETSC_DECIDE) {
3917       nbs  = n;
3918       ierr = PetscSplitOwnership(comm,&nbs,&Nbs);CHKERRQ(ierr);
3919       n    = nbs*cbs;
3920     } else {
3921       nbs = n/cbs;
3922     }
3923 
3924     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3925     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3926 
3927     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3928     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3929     if (rank == size-1) {
3930       /* Check sum(nbs) = Nbs */
3931       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3932     }
3933 
3934     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3935     for (i=0; i<mbs; i++) {
3936       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3937       nnz = nnz/bs;
3938       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3939       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3940       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3941     }
3942     ierr = PetscFree(bindx);CHKERRQ(ierr);
3943 
3944     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3945     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3946     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3947     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3948     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3949     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3950     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3951   }
3952 
3953   /* numeric phase */
3954   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3955   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3956 
3957   for (i=0; i<m; i++) {
3958     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3959     Ii   = i + rstart;
3960     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3961     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3962   }
3963   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3964   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3965   PetscFunctionReturn(0);
3966 }
3967