xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision b60d1aa9dffe7f0839cca942e8a5760aa5de1720)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
9 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
10 {
11   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
12   PetscErrorCode ierr;
13   PetscInt       i,*idxb = 0;
14   PetscScalar    *va,*vb;
15   Vec            vtmp;
16 
17   PetscFunctionBegin;
18   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
19   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
20   if (idx) {
21     for (i=0; i<A->rmap->n; i++) {
22       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
23     }
24   }
25 
26   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
27   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
28   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
29   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
30 
31   for (i=0; i<A->rmap->n; i++) {
32     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
33       va[i] = vb[i];
34       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
35     }
36   }
37 
38   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
39   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
40   ierr = PetscFree(idxb);CHKERRQ(ierr);
41   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
60 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
61 {
62   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
63   PetscErrorCode ierr;
64 
65   PetscFunctionBegin;
66   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
67   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
68   PetscFunctionReturn(0);
69 }
70 
71 /*
72      Local utility routine that creates a mapping from the global column
73    number to the local number in the off-diagonal part of the local
74    storage of the matrix.  This is done in a non scalable way since the
75    length of colmap equals the global matrix length.
76 */
77 #undef __FUNCT__
78 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
79 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
80 {
81   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
82   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
83   PetscErrorCode ierr;
84   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
85 
86   PetscFunctionBegin;
87 #if defined(PETSC_USE_CTABLE)
88   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
89   for (i=0; i<nbs; i++) {
90     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
91   }
92 #else
93   ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
94   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
95   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
96   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
97 #endif
98   PetscFunctionReturn(0);
99 }
100 
101 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
102   { \
103  \
104     brow = row/bs;  \
105     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
106     rmax = aimax[brow]; nrow = ailen[brow]; \
107     bcol = col/bs; \
108     ridx = row % bs; cidx = col % bs; \
109     low  = 0; high = nrow; \
110     while (high-low > 3) { \
111       t = (low+high)/2; \
112       if (rp[t] > bcol) high = t; \
113       else              low  = t; \
114     } \
115     for (_i=low; _i<high; _i++) { \
116       if (rp[_i] > bcol) break; \
117       if (rp[_i] == bcol) { \
118         bap = ap +  bs2*_i + bs*cidx + ridx; \
119         if (addv == ADD_VALUES) *bap += value;  \
120         else                    *bap  = value;  \
121         goto a_noinsert; \
122       } \
123     } \
124     if (a->nonew == 1) goto a_noinsert; \
125     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
126     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
127     N = nrow++ - 1;  \
128     /* shift up all the later entries in this row */ \
129     for (ii=N; ii>=_i; ii--) { \
130       rp[ii+1] = rp[ii]; \
131       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
132     } \
133     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
134     rp[_i]                      = bcol;  \
135     ap[bs2*_i + bs*cidx + ridx] = value;  \
136 a_noinsert:; \
137     ailen[brow] = nrow; \
138   }
139 
140 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
141   { \
142     brow = row/bs;  \
143     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
144     rmax = bimax[brow]; nrow = bilen[brow]; \
145     bcol = col/bs; \
146     ridx = row % bs; cidx = col % bs; \
147     low  = 0; high = nrow; \
148     while (high-low > 3) { \
149       t = (low+high)/2; \
150       if (rp[t] > bcol) high = t; \
151       else              low  = t; \
152     } \
153     for (_i=low; _i<high; _i++) { \
154       if (rp[_i] > bcol) break; \
155       if (rp[_i] == bcol) { \
156         bap = ap +  bs2*_i + bs*cidx + ridx; \
157         if (addv == ADD_VALUES) *bap += value;  \
158         else                    *bap  = value;  \
159         goto b_noinsert; \
160       } \
161     } \
162     if (b->nonew == 1) goto b_noinsert; \
163     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
164     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
165     N = nrow++ - 1;  \
166     /* shift up all the later entries in this row */ \
167     for (ii=N; ii>=_i; ii--) { \
168       rp[ii+1] = rp[ii]; \
169       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
170     } \
171     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
172     rp[_i]                      = bcol;  \
173     ap[bs2*_i + bs*cidx + ridx] = value;  \
174 b_noinsert:; \
175     bilen[brow] = nrow; \
176   }
177 
178 #undef __FUNCT__
179 #define __FUNCT__ "MatSetValues_MPIBAIJ"
180 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
181 {
182   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
183   MatScalar      value;
184   PetscBool      roworiented = baij->roworiented;
185   PetscErrorCode ierr;
186   PetscInt       i,j,row,col;
187   PetscInt       rstart_orig=mat->rmap->rstart;
188   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
189   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
190 
191   /* Some Variables required in the macro */
192   Mat         A     = baij->A;
193   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
194   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
195   MatScalar   *aa   =a->a;
196 
197   Mat         B     = baij->B;
198   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
199   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
200   MatScalar   *ba   =b->a;
201 
202   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
203   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
204   MatScalar *ap,*bap;
205 
206   PetscFunctionBegin;
207   for (i=0; i<m; i++) {
208     if (im[i] < 0) continue;
209 #if defined(PETSC_USE_DEBUG)
210     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
211 #endif
212     if (im[i] >= rstart_orig && im[i] < rend_orig) {
213       row = im[i] - rstart_orig;
214       for (j=0; j<n; j++) {
215         if (in[j] >= cstart_orig && in[j] < cend_orig) {
216           col = in[j] - cstart_orig;
217           if (roworiented) value = v[i*n+j];
218           else             value = v[i+j*m];
219           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
220           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
221         } else if (in[j] < 0) continue;
222 #if defined(PETSC_USE_DEBUG)
223         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
224 #endif
225         else {
226           if (mat->was_assembled) {
227             if (!baij->colmap) {
228               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
229             }
230 #if defined(PETSC_USE_CTABLE)
231             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
232             col  = col - 1;
233 #else
234             col = baij->colmap[in[j]/bs] - 1;
235 #endif
236             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
237               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
238               col  =  in[j];
239               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
240               B    = baij->B;
241               b    = (Mat_SeqBAIJ*)(B)->data;
242               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
243               ba   =b->a;
244             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
245             else col += in[j]%bs;
246           } else col = in[j];
247           if (roworiented) value = v[i*n+j];
248           else             value = v[i+j*m];
249           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
250           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
251         }
252       }
253     } else {
254       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
255       if (!baij->donotstash) {
256         mat->assembled = PETSC_FALSE;
257         if (roworiented) {
258           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
259         } else {
260           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
261         }
262       }
263     }
264   }
265   PetscFunctionReturn(0);
266 }
267 
268 #undef __FUNCT__
269 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
270 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
271 {
272   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
273   const PetscScalar *value;
274   MatScalar         *barray     = baij->barray;
275   PetscBool         roworiented = baij->roworiented;
276   PetscErrorCode    ierr;
277   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
278   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
279   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
280 
281   PetscFunctionBegin;
282   if (!barray) {
283     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
284     baij->barray = barray;
285   }
286 
287   if (roworiented) stepval = (n-1)*bs;
288   else stepval = (m-1)*bs;
289 
290   for (i=0; i<m; i++) {
291     if (im[i] < 0) continue;
292 #if defined(PETSC_USE_DEBUG)
293     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
294 #endif
295     if (im[i] >= rstart && im[i] < rend) {
296       row = im[i] - rstart;
297       for (j=0; j<n; j++) {
298         /* If NumCol = 1 then a copy is not required */
299         if ((roworiented) && (n == 1)) {
300           barray = (MatScalar*)v + i*bs2;
301         } else if ((!roworiented) && (m == 1)) {
302           barray = (MatScalar*)v + j*bs2;
303         } else { /* Here a copy is required */
304           if (roworiented) {
305             value = v + (i*(stepval+bs) + j)*bs;
306           } else {
307             value = v + (j*(stepval+bs) + i)*bs;
308           }
309           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
310             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
311             barray += bs;
312           }
313           barray -= bs2;
314         }
315 
316         if (in[j] >= cstart && in[j] < cend) {
317           col  = in[j] - cstart;
318           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
319         } else if (in[j] < 0) continue;
320 #if defined(PETSC_USE_DEBUG)
321         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
322 #endif
323         else {
324           if (mat->was_assembled) {
325             if (!baij->colmap) {
326               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
327             }
328 
329 #if defined(PETSC_USE_DEBUG)
330 #if defined(PETSC_USE_CTABLE)
331             { PetscInt data;
332               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
333               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
334             }
335 #else
336             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
337 #endif
338 #endif
339 #if defined(PETSC_USE_CTABLE)
340             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
341             col  = (col - 1)/bs;
342 #else
343             col = (baij->colmap[in[j]] - 1)/bs;
344 #endif
345             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
346               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
347               col  =  in[j];
348             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
349           } else col = in[j];
350           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
351         }
352       }
353     } else {
354       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
355       if (!baij->donotstash) {
356         if (roworiented) {
357           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
358         } else {
359           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
360         }
361       }
362     }
363   }
364   PetscFunctionReturn(0);
365 }
366 
367 #define HASH_KEY 0.6180339887
368 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
369 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
370 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
371 #undef __FUNCT__
372 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
373 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
374 {
375   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
376   PetscBool      roworiented = baij->roworiented;
377   PetscErrorCode ierr;
378   PetscInt       i,j,row,col;
379   PetscInt       rstart_orig=mat->rmap->rstart;
380   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
381   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
382   PetscReal      tmp;
383   MatScalar      **HD = baij->hd,value;
384 #if defined(PETSC_USE_DEBUG)
385   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
386 #endif
387 
388   PetscFunctionBegin;
389   for (i=0; i<m; i++) {
390 #if defined(PETSC_USE_DEBUG)
391     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
392     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
393 #endif
394     row = im[i];
395     if (row >= rstart_orig && row < rend_orig) {
396       for (j=0; j<n; j++) {
397         col = in[j];
398         if (roworiented) value = v[i*n+j];
399         else             value = v[i+j*m];
400         /* Look up PetscInto the Hash Table */
401         key = (row/bs)*Nbs+(col/bs)+1;
402         h1  = HASH(size,key,tmp);
403 
404 
405         idx = h1;
406 #if defined(PETSC_USE_DEBUG)
407         insert_ct++;
408         total_ct++;
409         if (HT[idx] != key) {
410           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
411           if (idx == size) {
412             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
413             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
414           }
415         }
416 #else
417         if (HT[idx] != key) {
418           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
419           if (idx == size) {
420             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
421             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
422           }
423         }
424 #endif
425         /* A HASH table entry is found, so insert the values at the correct address */
426         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
427         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
428       }
429     } else if (!baij->donotstash) {
430       if (roworiented) {
431         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
432       } else {
433         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
434       }
435     }
436   }
437 #if defined(PETSC_USE_DEBUG)
438   baij->ht_total_ct  = total_ct;
439   baij->ht_insert_ct = insert_ct;
440 #endif
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
446 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
447 {
448   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
449   PetscBool         roworiented = baij->roworiented;
450   PetscErrorCode    ierr;
451   PetscInt          i,j,ii,jj,row,col;
452   PetscInt          rstart=baij->rstartbs;
453   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
454   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
455   PetscReal         tmp;
456   MatScalar         **HD = baij->hd,*baij_a;
457   const PetscScalar *v_t,*value;
458 #if defined(PETSC_USE_DEBUG)
459   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
460 #endif
461 
462   PetscFunctionBegin;
463   if (roworiented) stepval = (n-1)*bs;
464   else stepval = (m-1)*bs;
465 
466   for (i=0; i<m; i++) {
467 #if defined(PETSC_USE_DEBUG)
468     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
469     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
470 #endif
471     row = im[i];
472     v_t = v + i*nbs2;
473     if (row >= rstart && row < rend) {
474       for (j=0; j<n; j++) {
475         col = in[j];
476 
477         /* Look up into the Hash Table */
478         key = row*Nbs+col+1;
479         h1  = HASH(size,key,tmp);
480 
481         idx = h1;
482 #if defined(PETSC_USE_DEBUG)
483         total_ct++;
484         insert_ct++;
485         if (HT[idx] != key) {
486           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
487           if (idx == size) {
488             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
489             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
490           }
491         }
492 #else
493         if (HT[idx] != key) {
494           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
495           if (idx == size) {
496             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
497             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498           }
499         }
500 #endif
501         baij_a = HD[idx];
502         if (roworiented) {
503           /*value = v + i*(stepval+bs)*bs + j*bs;*/
504           /* value = v + (i*(stepval+bs)+j)*bs; */
505           value = v_t;
506           v_t  += bs;
507           if (addv == ADD_VALUES) {
508             for (ii=0; ii<bs; ii++,value+=stepval) {
509               for (jj=ii; jj<bs2; jj+=bs) {
510                 baij_a[jj] += *value++;
511               }
512             }
513           } else {
514             for (ii=0; ii<bs; ii++,value+=stepval) {
515               for (jj=ii; jj<bs2; jj+=bs) {
516                 baij_a[jj] = *value++;
517               }
518             }
519           }
520         } else {
521           value = v + j*(stepval+bs)*bs + i*bs;
522           if (addv == ADD_VALUES) {
523             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
524               for (jj=0; jj<bs; jj++) {
525                 baij_a[jj] += *value++;
526               }
527             }
528           } else {
529             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
530               for (jj=0; jj<bs; jj++) {
531                 baij_a[jj] = *value++;
532               }
533             }
534           }
535         }
536       }
537     } else {
538       if (!baij->donotstash) {
539         if (roworiented) {
540           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
541         } else {
542           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
543         }
544       }
545     }
546   }
547 #if defined(PETSC_USE_DEBUG)
548   baij->ht_total_ct  = total_ct;
549   baij->ht_insert_ct = insert_ct;
550 #endif
551   PetscFunctionReturn(0);
552 }
553 
554 #undef __FUNCT__
555 #define __FUNCT__ "MatGetValues_MPIBAIJ"
556 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
557 {
558   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
559   PetscErrorCode ierr;
560   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
561   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
562 
563   PetscFunctionBegin;
564   for (i=0; i<m; i++) {
565     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
566     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
567     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
568       row = idxm[i] - bsrstart;
569       for (j=0; j<n; j++) {
570         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
571         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
572         if (idxn[j] >= bscstart && idxn[j] < bscend) {
573           col  = idxn[j] - bscstart;
574           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
575         } else {
576           if (!baij->colmap) {
577             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
578           }
579 #if defined(PETSC_USE_CTABLE)
580           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
581           data--;
582 #else
583           data = baij->colmap[idxn[j]/bs]-1;
584 #endif
585           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
586           else {
587             col  = data + idxn[j]%bs;
588             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
593   }
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatNorm_MPIBAIJ"
599 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
600 {
601   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
602   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
603   PetscErrorCode ierr;
604   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
605   PetscReal      sum = 0.0;
606   MatScalar      *v;
607 
608   PetscFunctionBegin;
609   if (baij->size == 1) {
610     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
611   } else {
612     if (type == NORM_FROBENIUS) {
613       v  = amat->a;
614       nz = amat->nz*bs2;
615       for (i=0; i<nz; i++) {
616         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
617       }
618       v  = bmat->a;
619       nz = bmat->nz*bs2;
620       for (i=0; i<nz; i++) {
621         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
622       }
623       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
624       *nrm = PetscSqrtReal(*nrm);
625     } else if (type == NORM_1) { /* max column sum */
626       PetscReal *tmp,*tmp2;
627       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
628       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
629       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
630       v    = amat->a; jj = amat->j;
631       for (i=0; i<amat->nz; i++) {
632         for (j=0; j<bs; j++) {
633           col = bs*(cstart + *jj) + j; /* column index */
634           for (row=0; row<bs; row++) {
635             tmp[col] += PetscAbsScalar(*v);  v++;
636           }
637         }
638         jj++;
639       }
640       v = bmat->a; jj = bmat->j;
641       for (i=0; i<bmat->nz; i++) {
642         for (j=0; j<bs; j++) {
643           col = bs*garray[*jj] + j;
644           for (row=0; row<bs; row++) {
645             tmp[col] += PetscAbsScalar(*v); v++;
646           }
647         }
648         jj++;
649       }
650       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
651       *nrm = 0.0;
652       for (j=0; j<mat->cmap->N; j++) {
653         if (tmp2[j] > *nrm) *nrm = tmp2[j];
654       }
655       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
656     } else if (type == NORM_INFINITY) { /* max row sum */
657       PetscReal *sums;
658       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
659       sum  = 0.0;
660       for (j=0; j<amat->mbs; j++) {
661         for (row=0; row<bs; row++) sums[row] = 0.0;
662         v  = amat->a + bs2*amat->i[j];
663         nz = amat->i[j+1]-amat->i[j];
664         for (i=0; i<nz; i++) {
665           for (col=0; col<bs; col++) {
666             for (row=0; row<bs; row++) {
667               sums[row] += PetscAbsScalar(*v); v++;
668             }
669           }
670         }
671         v  = bmat->a + bs2*bmat->i[j];
672         nz = bmat->i[j+1]-bmat->i[j];
673         for (i=0; i<nz; i++) {
674           for (col=0; col<bs; col++) {
675             for (row=0; row<bs; row++) {
676               sums[row] += PetscAbsScalar(*v); v++;
677             }
678           }
679         }
680         for (row=0; row<bs; row++) {
681           if (sums[row] > sum) sum = sums[row];
682         }
683       }
684       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
685       ierr = PetscFree(sums);CHKERRQ(ierr);
686     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
687   }
688   PetscFunctionReturn(0);
689 }
690 
691 /*
692   Creates the hash table, and sets the table
693   This table is created only once.
694   If new entried need to be added to the matrix
695   then the hash table has to be destroyed and
696   recreated.
697 */
698 #undef __FUNCT__
699 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
700 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
701 {
702   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
703   Mat            A     = baij->A,B=baij->B;
704   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
705   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
706   PetscErrorCode ierr;
707   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
708   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
709   PetscInt       *HT,key;
710   MatScalar      **HD;
711   PetscReal      tmp;
712 #if defined(PETSC_USE_INFO)
713   PetscInt ct=0,max=0;
714 #endif
715 
716   PetscFunctionBegin;
717   if (baij->ht) PetscFunctionReturn(0);
718 
719   baij->ht_size = (PetscInt)(factor*nz);
720   ht_size       = baij->ht_size;
721 
722   /* Allocate Memory for Hash Table */
723   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
724   HD   = baij->hd;
725   HT   = baij->ht;
726 
727   /* Loop Over A */
728   for (i=0; i<a->mbs; i++) {
729     for (j=ai[i]; j<ai[i+1]; j++) {
730       row = i+rstart;
731       col = aj[j]+cstart;
732 
733       key = row*Nbs + col + 1;
734       h1  = HASH(ht_size,key,tmp);
735       for (k=0; k<ht_size; k++) {
736         if (!HT[(h1+k)%ht_size]) {
737           HT[(h1+k)%ht_size] = key;
738           HD[(h1+k)%ht_size] = a->a + j*bs2;
739           break;
740 #if defined(PETSC_USE_INFO)
741         } else {
742           ct++;
743 #endif
744         }
745       }
746 #if defined(PETSC_USE_INFO)
747       if (k> max) max = k;
748 #endif
749     }
750   }
751   /* Loop Over B */
752   for (i=0; i<b->mbs; i++) {
753     for (j=bi[i]; j<bi[i+1]; j++) {
754       row = i+rstart;
755       col = garray[bj[j]];
756       key = row*Nbs + col + 1;
757       h1  = HASH(ht_size,key,tmp);
758       for (k=0; k<ht_size; k++) {
759         if (!HT[(h1+k)%ht_size]) {
760           HT[(h1+k)%ht_size] = key;
761           HD[(h1+k)%ht_size] = b->a + j*bs2;
762           break;
763 #if defined(PETSC_USE_INFO)
764         } else {
765           ct++;
766 #endif
767         }
768       }
769 #if defined(PETSC_USE_INFO)
770       if (k> max) max = k;
771 #endif
772     }
773   }
774 
775   /* Print Summary */
776 #if defined(PETSC_USE_INFO)
777   for (i=0,j=0; i<ht_size; i++) {
778     if (HT[i]) j++;
779   }
780   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
781 #endif
782   PetscFunctionReturn(0);
783 }
784 
785 #undef __FUNCT__
786 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
787 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
788 {
789   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
790   PetscErrorCode ierr;
791   PetscInt       nstash,reallocs;
792   InsertMode     addv;
793 
794   PetscFunctionBegin;
795   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
796 
797   /* make sure all processors are either in INSERTMODE or ADDMODE */
798   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
799   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
800   mat->insertmode = addv; /* in case this processor had no cache */
801 
802   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
803   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
804   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
805   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
806   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
807   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
813 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
814 {
815   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
816   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
817   PetscErrorCode ierr;
818   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
819   PetscInt       *row,*col;
820   PetscBool      r1,r2,r3,other_disassembled;
821   MatScalar      *val;
822   InsertMode     addv = mat->insertmode;
823   PetscMPIInt    n;
824 
825   PetscFunctionBegin;
826   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
827   if (!baij->donotstash && !mat->nooffprocentries) {
828     while (1) {
829       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
830       if (!flg) break;
831 
832       for (i=0; i<n;) {
833         /* Now identify the consecutive vals belonging to the same row */
834         for (j=i,rstart=row[j]; j<n; j++) {
835           if (row[j] != rstart) break;
836         }
837         if (j < n) ncols = j-i;
838         else       ncols = n-i;
839         /* Now assemble all these values with a single function call */
840         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
841         i    = j;
842       }
843     }
844     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
845     /* Now process the block-stash. Since the values are stashed column-oriented,
846        set the roworiented flag to column oriented, and after MatSetValues()
847        restore the original flags */
848     r1 = baij->roworiented;
849     r2 = a->roworiented;
850     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
851 
852     baij->roworiented = PETSC_FALSE;
853     a->roworiented    = PETSC_FALSE;
854 
855     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
856     while (1) {
857       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
858       if (!flg) break;
859 
860       for (i=0; i<n;) {
861         /* Now identify the consecutive vals belonging to the same row */
862         for (j=i,rstart=row[j]; j<n; j++) {
863           if (row[j] != rstart) break;
864         }
865         if (j < n) ncols = j-i;
866         else       ncols = n-i;
867         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
868         i    = j;
869       }
870     }
871     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
872 
873     baij->roworiented = r1;
874     a->roworiented    = r2;
875 
876     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
877   }
878 
879   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
880   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
881 
882   /* determine if any processor has disassembled, if so we must
883      also disassemble ourselfs, in order that we may reassemble. */
884   /*
885      if nonzero structure of submatrix B cannot change then we know that
886      no processor disassembled thus we can skip this stuff
887   */
888   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
889     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
890     if (mat->was_assembled && !other_disassembled) {
891       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
892     }
893   }
894 
895   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
896     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
897   }
898   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
899   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
900 
901 #if defined(PETSC_USE_INFO)
902   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
903     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
904 
905     baij->ht_total_ct  = 0;
906     baij->ht_insert_ct = 0;
907   }
908 #endif
909   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
910     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
911 
912     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
913     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
914   }
915 
916   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
917 
918   baij->rowvalues = 0;
919 
920   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
921   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
922     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
923     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
924   }
925   PetscFunctionReturn(0);
926 }
927 
928 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
929 #include <petscdraw.h>
930 #undef __FUNCT__
931 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
932 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
933 {
934   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
935   PetscErrorCode    ierr;
936   PetscMPIInt       rank = baij->rank;
937   PetscInt          bs   = mat->rmap->bs;
938   PetscBool         iascii,isdraw;
939   PetscViewer       sviewer;
940   PetscViewerFormat format;
941 
942   PetscFunctionBegin;
943   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
944   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
945   if (iascii) {
946     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
947     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
948       MatInfo info;
949       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
950       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
951       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
952       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
953                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
954       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
955       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
956       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
957       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
958       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
959       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
960       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
961       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
962       PetscFunctionReturn(0);
963     } else if (format == PETSC_VIEWER_ASCII_INFO) {
964       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
965       PetscFunctionReturn(0);
966     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
967       PetscFunctionReturn(0);
968     }
969   }
970 
971   if (isdraw) {
972     PetscDraw draw;
973     PetscBool isnull;
974     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
975     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
976   }
977 
978   {
979     /* assemble the entire matrix onto first processor. */
980     Mat         A;
981     Mat_SeqBAIJ *Aloc;
982     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
983     MatScalar   *a;
984     const char  *matname;
985 
986     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
987     /* Perhaps this should be the type of mat? */
988     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
989     if (!rank) {
990       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
991     } else {
992       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
993     }
994     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
995     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
996     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
997     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
998 
999     /* copy over the A part */
1000     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1001     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1002     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1003 
1004     for (i=0; i<mbs; i++) {
1005       rvals[0] = bs*(baij->rstartbs + i);
1006       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1007       for (j=ai[i]; j<ai[i+1]; j++) {
1008         col = (baij->cstartbs+aj[j])*bs;
1009         for (k=0; k<bs; k++) {
1010           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1011           col++; a += bs;
1012         }
1013       }
1014     }
1015     /* copy over the B part */
1016     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1017     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1018     for (i=0; i<mbs; i++) {
1019       rvals[0] = bs*(baij->rstartbs + i);
1020       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1021       for (j=ai[i]; j<ai[i+1]; j++) {
1022         col = baij->garray[aj[j]]*bs;
1023         for (k=0; k<bs; k++) {
1024           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1025           col++; a += bs;
1026         }
1027       }
1028     }
1029     ierr = PetscFree(rvals);CHKERRQ(ierr);
1030     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1031     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1032     /*
1033        Everyone has to call to draw the matrix since the graphics waits are
1034        synchronized across all processors that share the PetscDraw object
1035     */
1036     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1037     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1038     if (!rank) {
1039       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1040       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1041     }
1042     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1043     ierr = MatDestroy(&A);CHKERRQ(ierr);
1044   }
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 #undef __FUNCT__
1049 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1050 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1051 {
1052   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1053   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1054   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1055   PetscErrorCode ierr;
1056   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1057   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1058   int            fd;
1059   PetscScalar    *column_values;
1060   FILE           *file;
1061   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1062   PetscInt       message_count,flowcontrolcount;
1063 
1064   PetscFunctionBegin;
1065   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1066   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1067   nz   = bs2*(A->nz + B->nz);
1068   rlen = mat->rmap->n;
1069   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1070   if (!rank) {
1071     header[0] = MAT_FILE_CLASSID;
1072     header[1] = mat->rmap->N;
1073     header[2] = mat->cmap->N;
1074 
1075     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1076     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1077     /* get largest number of rows any processor has */
1078     range = mat->rmap->range;
1079     for (i=1; i<size; i++) {
1080       rlen = PetscMax(rlen,range[i+1] - range[i]);
1081     }
1082   } else {
1083     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1084   }
1085 
1086   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1087   /* compute lengths of each row  */
1088   for (i=0; i<a->mbs; i++) {
1089     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1090   }
1091   /* store the row lengths to the file */
1092   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1093   if (!rank) {
1094     MPI_Status status;
1095     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1096     rlen = (range[1] - range[0])/bs;
1097     for (i=0; i<rlen; i++) {
1098       for (j=0; j<bs; j++) {
1099         row_lens[i*bs+j] = bs*crow_lens[i];
1100       }
1101     }
1102     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1103     for (i=1; i<size; i++) {
1104       rlen = (range[i+1] - range[i])/bs;
1105       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1106       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1107       for (k=0; k<rlen; k++) {
1108         for (j=0; j<bs; j++) {
1109           row_lens[k*bs+j] = bs*crow_lens[k];
1110         }
1111       }
1112       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1113     }
1114     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1115     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1116   } else {
1117     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1118     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1119     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1120   }
1121   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1122 
1123   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1124      information needed to make it for each row from a block row. This does require more communication but still not more than
1125      the communication needed for the nonzero values  */
1126   nzmax = nz; /*  space a largest processor needs */
1127   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1128   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1129   cnt   = 0;
1130   for (i=0; i<a->mbs; i++) {
1131     pcnt = cnt;
1132     for (j=B->i[i]; j<B->i[i+1]; j++) {
1133       if ((col = garray[B->j[j]]) > cstart) break;
1134       for (l=0; l<bs; l++) {
1135         column_indices[cnt++] = bs*col+l;
1136       }
1137     }
1138     for (k=A->i[i]; k<A->i[i+1]; k++) {
1139       for (l=0; l<bs; l++) {
1140         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1141       }
1142     }
1143     for (; j<B->i[i+1]; j++) {
1144       for (l=0; l<bs; l++) {
1145         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1146       }
1147     }
1148     len = cnt - pcnt;
1149     for (k=1; k<bs; k++) {
1150       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1151       cnt += len;
1152     }
1153   }
1154   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1155 
1156   /* store the columns to the file */
1157   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1158   if (!rank) {
1159     MPI_Status status;
1160     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1161     for (i=1; i<size; i++) {
1162       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1163       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1164       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1165       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1166     }
1167     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1168   } else {
1169     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1170     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1171     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1172     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1173   }
1174   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1175 
1176   /* load up the numerical values */
1177   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1178   cnt  = 0;
1179   for (i=0; i<a->mbs; i++) {
1180     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1181     for (j=B->i[i]; j<B->i[i+1]; j++) {
1182       if (garray[B->j[j]] > cstart) break;
1183       for (l=0; l<bs; l++) {
1184         for (ll=0; ll<bs; ll++) {
1185           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1186         }
1187       }
1188       cnt += bs;
1189     }
1190     for (k=A->i[i]; k<A->i[i+1]; k++) {
1191       for (l=0; l<bs; l++) {
1192         for (ll=0; ll<bs; ll++) {
1193           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1194         }
1195       }
1196       cnt += bs;
1197     }
1198     for (; j<B->i[i+1]; j++) {
1199       for (l=0; l<bs; l++) {
1200         for (ll=0; ll<bs; ll++) {
1201           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1202         }
1203       }
1204       cnt += bs;
1205     }
1206     cnt += (bs-1)*rlen;
1207   }
1208   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1209 
1210   /* store the column values to the file */
1211   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1212   if (!rank) {
1213     MPI_Status status;
1214     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1215     for (i=1; i<size; i++) {
1216       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1217       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1218       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1219       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1220     }
1221     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1222   } else {
1223     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1224     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1225     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1227   }
1228   ierr = PetscFree(column_values);CHKERRQ(ierr);
1229 
1230   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1231   if (file) {
1232     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1233   }
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 #undef __FUNCT__
1238 #define __FUNCT__ "MatView_MPIBAIJ"
1239 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1240 {
1241   PetscErrorCode ierr;
1242   PetscBool      iascii,isdraw,issocket,isbinary;
1243 
1244   PetscFunctionBegin;
1245   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1246   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1247   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1248   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1249   if (iascii || isdraw || issocket) {
1250     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1251   } else if (isbinary) {
1252     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1253   }
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1259 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1260 {
1261   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1262   PetscErrorCode ierr;
1263 
1264   PetscFunctionBegin;
1265 #if defined(PETSC_USE_LOG)
1266   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1267 #endif
1268   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1269   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1270   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1271   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1272 #if defined(PETSC_USE_CTABLE)
1273   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1274 #else
1275   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1276 #endif
1277   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1278   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1279   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1280   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1281   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1282   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1283   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1284   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1285 
1286   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatMult_MPIBAIJ"
1301 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1302 {
1303   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1304   PetscErrorCode ierr;
1305   PetscInt       nt;
1306 
1307   PetscFunctionBegin;
1308   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1309   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1310   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1311   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1312   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1313   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1314   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1315   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1321 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1322 {
1323   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1324   PetscErrorCode ierr;
1325 
1326   PetscFunctionBegin;
1327   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1328   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1329   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1330   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 #undef __FUNCT__
1335 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1336 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1337 {
1338   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1339   PetscErrorCode ierr;
1340   PetscBool      merged;
1341 
1342   PetscFunctionBegin;
1343   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1344   /* do nondiagonal part */
1345   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1346   if (!merged) {
1347     /* send it on its way */
1348     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1349     /* do local part */
1350     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1351     /* receive remote parts: note this assumes the values are not actually */
1352     /* inserted in yy until the next line */
1353     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1354   } else {
1355     /* do local part */
1356     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1357     /* send it on its way */
1358     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1359     /* values actually were received in the Begin() but we need to call this nop */
1360     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1361   }
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1367 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1368 {
1369   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   /* do nondiagonal part */
1374   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1375   /* send it on its way */
1376   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1377   /* do local part */
1378   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1379   /* receive remote parts: note this assumes the values are not actually */
1380   /* inserted in yy until the next line, which is true for my implementation*/
1381   /* but is not perhaps always true. */
1382   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 /*
1387   This only works correctly for square matrices where the subblock A->A is the
1388    diagonal block
1389 */
1390 #undef __FUNCT__
1391 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1392 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1393 {
1394   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1395   PetscErrorCode ierr;
1396 
1397   PetscFunctionBegin;
1398   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1399   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatScale_MPIBAIJ"
1405 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1406 {
1407   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1408   PetscErrorCode ierr;
1409 
1410   PetscFunctionBegin;
1411   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1412   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1418 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1419 {
1420   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1421   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1422   PetscErrorCode ierr;
1423   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1424   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1425   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1426 
1427   PetscFunctionBegin;
1428   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1429   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1430   mat->getrowactive = PETSC_TRUE;
1431 
1432   if (!mat->rowvalues && (idx || v)) {
1433     /*
1434         allocate enough space to hold information from the longest row.
1435     */
1436     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1437     PetscInt    max = 1,mbs = mat->mbs,tmp;
1438     for (i=0; i<mbs; i++) {
1439       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1440       if (max < tmp) max = tmp;
1441     }
1442     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1443   }
1444   lrow = row - brstart;
1445 
1446   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1447   if (!v)   {pvA = 0; pvB = 0;}
1448   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1449   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1450   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1451   nztot = nzA + nzB;
1452 
1453   cmap = mat->garray;
1454   if (v  || idx) {
1455     if (nztot) {
1456       /* Sort by increasing column numbers, assuming A and B already sorted */
1457       PetscInt imark = -1;
1458       if (v) {
1459         *v = v_p = mat->rowvalues;
1460         for (i=0; i<nzB; i++) {
1461           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1462           else break;
1463         }
1464         imark = i;
1465         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1466         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1467       }
1468       if (idx) {
1469         *idx = idx_p = mat->rowindices;
1470         if (imark > -1) {
1471           for (i=0; i<imark; i++) {
1472             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1473           }
1474         } else {
1475           for (i=0; i<nzB; i++) {
1476             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1477             else break;
1478           }
1479           imark = i;
1480         }
1481         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1482         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1483       }
1484     } else {
1485       if (idx) *idx = 0;
1486       if (v)   *v   = 0;
1487     }
1488   }
1489   *nz  = nztot;
1490   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1491   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1497 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1498 {
1499   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1500 
1501   PetscFunctionBegin;
1502   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1503   baij->getrowactive = PETSC_FALSE;
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 #undef __FUNCT__
1508 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1509 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1510 {
1511   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1512   PetscErrorCode ierr;
1513 
1514   PetscFunctionBegin;
1515   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1516   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1522 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1523 {
1524   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1525   Mat            A  = a->A,B = a->B;
1526   PetscErrorCode ierr;
1527   PetscReal      isend[5],irecv[5];
1528 
1529   PetscFunctionBegin;
1530   info->block_size = (PetscReal)matin->rmap->bs;
1531 
1532   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1533 
1534   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1535   isend[3] = info->memory;  isend[4] = info->mallocs;
1536 
1537   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1538 
1539   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1540   isend[3] += info->memory;  isend[4] += info->mallocs;
1541 
1542   if (flag == MAT_LOCAL) {
1543     info->nz_used      = isend[0];
1544     info->nz_allocated = isend[1];
1545     info->nz_unneeded  = isend[2];
1546     info->memory       = isend[3];
1547     info->mallocs      = isend[4];
1548   } else if (flag == MAT_GLOBAL_MAX) {
1549     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1550 
1551     info->nz_used      = irecv[0];
1552     info->nz_allocated = irecv[1];
1553     info->nz_unneeded  = irecv[2];
1554     info->memory       = irecv[3];
1555     info->mallocs      = irecv[4];
1556   } else if (flag == MAT_GLOBAL_SUM) {
1557     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1558 
1559     info->nz_used      = irecv[0];
1560     info->nz_allocated = irecv[1];
1561     info->nz_unneeded  = irecv[2];
1562     info->memory       = irecv[3];
1563     info->mallocs      = irecv[4];
1564   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1565   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1566   info->fill_ratio_needed = 0;
1567   info->factor_mallocs    = 0;
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1573 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1574 {
1575   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1576   PetscErrorCode ierr;
1577 
1578   PetscFunctionBegin;
1579   switch (op) {
1580   case MAT_NEW_NONZERO_LOCATIONS:
1581   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1582   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1583   case MAT_KEEP_NONZERO_PATTERN:
1584   case MAT_NEW_NONZERO_LOCATION_ERR:
1585     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1586     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1587     break;
1588   case MAT_ROW_ORIENTED:
1589     a->roworiented = flg;
1590 
1591     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1592     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1593     break;
1594   case MAT_NEW_DIAGONALS:
1595     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1596     break;
1597   case MAT_IGNORE_OFF_PROC_ENTRIES:
1598     a->donotstash = flg;
1599     break;
1600   case MAT_USE_HASH_TABLE:
1601     a->ht_flag = flg;
1602     break;
1603   case MAT_SYMMETRIC:
1604   case MAT_STRUCTURALLY_SYMMETRIC:
1605   case MAT_HERMITIAN:
1606   case MAT_SYMMETRY_ETERNAL:
1607     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1608     break;
1609   default:
1610     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1611   }
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 #undef __FUNCT__
1616 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1617 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1618 {
1619   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1620   Mat_SeqBAIJ    *Aloc;
1621   Mat            B;
1622   PetscErrorCode ierr;
1623   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1624   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1625   MatScalar      *a;
1626 
1627   PetscFunctionBegin;
1628   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1629   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1630     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1631     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1632     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1633     /* Do not know preallocation information, but must set block size */
1634     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1635   } else {
1636     B = *matout;
1637   }
1638 
1639   /* copy over the A part */
1640   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1641   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1642   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1643 
1644   for (i=0; i<mbs; i++) {
1645     rvals[0] = bs*(baij->rstartbs + i);
1646     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1647     for (j=ai[i]; j<ai[i+1]; j++) {
1648       col = (baij->cstartbs+aj[j])*bs;
1649       for (k=0; k<bs; k++) {
1650         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1651 
1652         col++; a += bs;
1653       }
1654     }
1655   }
1656   /* copy over the B part */
1657   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1658   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1659   for (i=0; i<mbs; i++) {
1660     rvals[0] = bs*(baij->rstartbs + i);
1661     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1662     for (j=ai[i]; j<ai[i+1]; j++) {
1663       col = baij->garray[aj[j]]*bs;
1664       for (k=0; k<bs; k++) {
1665         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1666         col++;
1667         a += bs;
1668       }
1669     }
1670   }
1671   ierr = PetscFree(rvals);CHKERRQ(ierr);
1672   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1673   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1674 
1675   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1676   else {
1677     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1678   }
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1684 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1685 {
1686   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1687   Mat            a     = baij->A,b = baij->B;
1688   PetscErrorCode ierr;
1689   PetscInt       s1,s2,s3;
1690 
1691   PetscFunctionBegin;
1692   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1693   if (rr) {
1694     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1695     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1696     /* Overlap communication with computation. */
1697     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1698   }
1699   if (ll) {
1700     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1701     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1702     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1703   }
1704   /* scale  the diagonal block */
1705   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1706 
1707   if (rr) {
1708     /* Do a scatter end and then right scale the off-diagonal block */
1709     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1710     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1711   }
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 #undef __FUNCT__
1716 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1717 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1718 {
1719   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1720   PetscInt      *owners = A->rmap->range;
1721   PetscInt       n      = A->rmap->n;
1722   PetscSF        sf;
1723   PetscInt      *lrows;
1724   PetscSFNode   *rrows;
1725   PetscInt       r, p = 0, len = 0;
1726   PetscErrorCode ierr;
1727 
1728   PetscFunctionBegin;
1729   /* Create SF where leaves are input rows and roots are owned rows */
1730   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1731   for (r = 0; r < n; ++r) lrows[r] = -1;
1732   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
1733   for (r = 0; r < N; ++r) {
1734     const PetscInt idx   = rows[r];
1735     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1736     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1737       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1738     }
1739     if (A->nooffproczerorows) {
1740       if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank);
1741       lrows[len++] = idx - owners[p];
1742     } else {
1743       rrows[r].rank = p;
1744       rrows[r].index = rows[r] - owners[p];
1745     }
1746   }
1747   if (!A->nooffproczerorows) {
1748     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1749     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1750     /* Collect flags for rows to be zeroed */
1751     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1752     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1753     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1754     /* Compress and put in row numbers */
1755     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1756   }
1757   /* fix right hand side if needed */
1758   if (x && b) {
1759     const PetscScalar *xx;
1760     PetscScalar       *bb;
1761 
1762     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1763     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1764     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1765     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1766     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1767   }
1768 
1769   /* actually zap the local rows */
1770   /*
1771         Zero the required rows. If the "diagonal block" of the matrix
1772      is square and the user wishes to set the diagonal we use separate
1773      code so that MatSetValues() is not called for each diagonal allocating
1774      new memory, thus calling lots of mallocs and slowing things down.
1775 
1776   */
1777   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1778   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1779   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1780     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1781   } else if (diag != 0.0) {
1782     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1783     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1784        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1785     for (r = 0; r < len; ++r) {
1786       const PetscInt row = lrows[r] + A->rmap->rstart;
1787       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1788     }
1789     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1790     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1791   } else {
1792     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1793   }
1794   ierr = PetscFree(lrows);CHKERRQ(ierr);
1795 
1796   /* only change matrix nonzero state if pattern was allowed to be changed */
1797   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1798     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1799     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1800   }
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1806 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1807 {
1808   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1809   PetscErrorCode    ierr;
1810   PetscMPIInt       n = A->rmap->n;
1811   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1812   PetscInt          *lrows,*owners = A->rmap->range;
1813   PetscSFNode       *rrows;
1814   PetscSF           sf;
1815   const PetscScalar *xx;
1816   PetscScalar       *bb,*mask;
1817   Vec               xmask,lmask;
1818   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1819   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1820   PetscScalar       *aa;
1821 
1822   PetscFunctionBegin;
1823   /* Create SF where leaves are input rows and roots are owned rows */
1824   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1825   for (r = 0; r < n; ++r) lrows[r] = -1;
1826   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1827   for (r = 0; r < N; ++r) {
1828     const PetscInt idx   = rows[r];
1829     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1830     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1831       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1832     }
1833     rrows[r].rank  = p;
1834     rrows[r].index = rows[r] - owners[p];
1835   }
1836   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1837   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1838   /* Collect flags for rows to be zeroed */
1839   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1840   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1841   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1842   /* Compress and put in row numbers */
1843   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1844   /* zero diagonal part of matrix */
1845   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1846   /* handle off diagonal part of matrix */
1847   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1848   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1849   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1850   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1851   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1852   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1853   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1854   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1855   if (x) {
1856     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1857     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1858     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1859     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1860   }
1861   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1862   /* remove zeroed rows of off diagonal matrix */
1863   for (i = 0; i < len; ++i) {
1864     row   = lrows[i];
1865     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1866     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1867     for (k = 0; k < count; ++k) {
1868       aa[0] = 0.0;
1869       aa   += bs;
1870     }
1871   }
1872   /* loop over all elements of off process part of matrix zeroing removed columns*/
1873   for (i = 0; i < l->B->rmap->N; ++i) {
1874     row = i/bs;
1875     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1876       for (k = 0; k < bs; ++k) {
1877         col = bs*baij->j[j] + k;
1878         if (PetscAbsScalar(mask[col])) {
1879           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1880           if (b) bb[i] -= aa[0]*xx[col];
1881           aa[0] = 0.0;
1882         }
1883       }
1884     }
1885   }
1886   if (x) {
1887     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1888     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1889   }
1890   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1891   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1892   ierr = PetscFree(lrows);CHKERRQ(ierr);
1893 
1894   /* only change matrix nonzero state if pattern was allowed to be changed */
1895   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1896     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1897     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 #undef __FUNCT__
1903 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1904 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1905 {
1906   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1907   PetscErrorCode ierr;
1908 
1909   PetscFunctionBegin;
1910   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatEqual_MPIBAIJ"
1918 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1919 {
1920   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1921   Mat            a,b,c,d;
1922   PetscBool      flg;
1923   PetscErrorCode ierr;
1924 
1925   PetscFunctionBegin;
1926   a = matA->A; b = matA->B;
1927   c = matB->A; d = matB->B;
1928 
1929   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1930   if (flg) {
1931     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1932   }
1933   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 #undef __FUNCT__
1938 #define __FUNCT__ "MatCopy_MPIBAIJ"
1939 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1940 {
1941   PetscErrorCode ierr;
1942   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1943   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1944 
1945   PetscFunctionBegin;
1946   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1947   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1948     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1949   } else {
1950     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1951     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1952   }
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 #undef __FUNCT__
1957 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1958 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1959 {
1960   PetscErrorCode ierr;
1961 
1962   PetscFunctionBegin;
1963   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1964   PetscFunctionReturn(0);
1965 }
1966 
1967 #undef __FUNCT__
1968 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ"
1969 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1970 {
1971   PetscErrorCode ierr;
1972   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1973   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1974   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1975 
1976   PetscFunctionBegin;
1977   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 #undef __FUNCT__
1982 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1983 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1984 {
1985   PetscErrorCode ierr;
1986   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1987   PetscBLASInt   bnz,one=1;
1988   Mat_SeqBAIJ    *x,*y;
1989 
1990   PetscFunctionBegin;
1991   if (str == SAME_NONZERO_PATTERN) {
1992     PetscScalar alpha = a;
1993     x    = (Mat_SeqBAIJ*)xx->A->data;
1994     y    = (Mat_SeqBAIJ*)yy->A->data;
1995     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1996     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1997     x    = (Mat_SeqBAIJ*)xx->B->data;
1998     y    = (Mat_SeqBAIJ*)yy->B->data;
1999     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2000     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2001     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2002   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2003     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2004   } else {
2005     Mat      B;
2006     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
2007     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2008     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2009     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2010     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2011     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2012     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2013     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
2014     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2015     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2016     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2017     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
2018     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2019     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2020     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2021     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2022   }
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 #undef __FUNCT__
2027 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2028 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2029 {
2030   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2031   PetscErrorCode ierr;
2032 
2033   PetscFunctionBegin;
2034   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2035   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2036   PetscFunctionReturn(0);
2037 }
2038 
2039 #undef __FUNCT__
2040 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2041 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2042 {
2043   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2044   PetscErrorCode ierr;
2045 
2046   PetscFunctionBegin;
2047   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2048   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 #undef __FUNCT__
2053 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2054 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2055 {
2056   PetscErrorCode ierr;
2057   IS             iscol_local;
2058   PetscInt       csize;
2059 
2060   PetscFunctionBegin;
2061   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2062   if (call == MAT_REUSE_MATRIX) {
2063     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2064     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2065   } else {
2066     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2067   }
2068   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2069   if (call == MAT_INITIAL_MATRIX) {
2070     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2071     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2078 /*
2079   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2080   in local and then by concatenating the local matrices the end result.
2081   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ().
2082   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
2083 */
2084 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2085 {
2086   PetscErrorCode ierr;
2087   PetscMPIInt    rank,size;
2088   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2089   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2090   Mat            M,Mreuse;
2091   MatScalar      *vwork,*aa;
2092   MPI_Comm       comm;
2093   IS             isrow_new, iscol_new;
2094   PetscBool      idflag,allrows, allcols;
2095   Mat_SeqBAIJ    *aij;
2096 
2097   PetscFunctionBegin;
2098   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2099   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2100   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2101   /* The compression and expansion should be avoided. Doesn't point
2102      out errors, might change the indices, hence buggey */
2103   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2104   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2105 
2106   /* Check for special case: each processor gets entire matrix columns */
2107   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2108   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2109   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2110   else allcols = PETSC_FALSE;
2111 
2112   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2113   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2114   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2115   else allrows = PETSC_FALSE;
2116 
2117   if (call ==  MAT_REUSE_MATRIX) {
2118     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2119     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2120     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2121   } else {
2122     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2123   }
2124   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2125   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2126   /*
2127       m - number of local rows
2128       n - number of columns (same on all processors)
2129       rstart - first row in new global matrix generated
2130   */
2131   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2132   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2133   m    = m/bs;
2134   n    = n/bs;
2135 
2136   if (call == MAT_INITIAL_MATRIX) {
2137     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2138     ii  = aij->i;
2139     jj  = aij->j;
2140 
2141     /*
2142         Determine the number of non-zeros in the diagonal and off-diagonal
2143         portions of the matrix in order to do correct preallocation
2144     */
2145 
2146     /* first get start and end of "diagonal" columns */
2147     if (csize == PETSC_DECIDE) {
2148       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2149       if (mglobal == n*bs) { /* square matrix */
2150         nlocal = m;
2151       } else {
2152         nlocal = n/size + ((n % size) > rank);
2153       }
2154     } else {
2155       nlocal = csize/bs;
2156     }
2157     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2158     rstart = rend - nlocal;
2159     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2160 
2161     /* next, compute all the lengths */
2162     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2163     for (i=0; i<m; i++) {
2164       jend = ii[i+1] - ii[i];
2165       olen = 0;
2166       dlen = 0;
2167       for (j=0; j<jend; j++) {
2168         if (*jj < rstart || *jj >= rend) olen++;
2169         else dlen++;
2170         jj++;
2171       }
2172       olens[i] = olen;
2173       dlens[i] = dlen;
2174     }
2175     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2176     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2177     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2178     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2179     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2180     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2181   } else {
2182     PetscInt ml,nl;
2183 
2184     M    = *newmat;
2185     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2186     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2187     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2188     /*
2189          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2190        rather than the slower MatSetValues().
2191     */
2192     M->was_assembled = PETSC_TRUE;
2193     M->assembled     = PETSC_FALSE;
2194   }
2195   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2196   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2197   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2198   ii   = aij->i;
2199   jj   = aij->j;
2200   aa   = aij->a;
2201   for (i=0; i<m; i++) {
2202     row   = rstart/bs + i;
2203     nz    = ii[i+1] - ii[i];
2204     cwork = jj;     jj += nz;
2205     vwork = aa;     aa += nz*bs*bs;
2206     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2207   }
2208 
2209   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2210   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2211   *newmat = M;
2212 
2213   /* save submatrix used in processor for next request */
2214   if (call ==  MAT_INITIAL_MATRIX) {
2215     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2216     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2217   }
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatPermute_MPIBAIJ"
2223 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2224 {
2225   MPI_Comm       comm,pcomm;
2226   PetscInt       clocal_size,nrows;
2227   const PetscInt *rows;
2228   PetscMPIInt    size;
2229   IS             crowp,lcolp;
2230   PetscErrorCode ierr;
2231 
2232   PetscFunctionBegin;
2233   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2234   /* make a collective version of 'rowp' */
2235   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2236   if (pcomm==comm) {
2237     crowp = rowp;
2238   } else {
2239     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2240     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2241     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2242     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2243   }
2244   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2245   /* make a local version of 'colp' */
2246   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2247   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2248   if (size==1) {
2249     lcolp = colp;
2250   } else {
2251     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2252   }
2253   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2254   /* now we just get the submatrix */
2255   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2256   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2257   /* clean up */
2258   if (pcomm!=comm) {
2259     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2260   }
2261   if (size>1) {
2262     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2263   }
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 #undef __FUNCT__
2268 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2269 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2270 {
2271   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2272   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2273 
2274   PetscFunctionBegin;
2275   if (nghosts) *nghosts = B->nbs;
2276   if (ghosts) *ghosts = baij->garray;
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 #undef __FUNCT__
2281 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2282 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2283 {
2284   Mat            B;
2285   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2286   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2287   Mat_SeqAIJ     *b;
2288   PetscErrorCode ierr;
2289   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2290   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2291   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2292 
2293   PetscFunctionBegin;
2294   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2295   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2296 
2297   /* ----------------------------------------------------------------
2298      Tell every processor the number of nonzeros per row
2299   */
2300   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2301   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2302     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2303   }
2304   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2305   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2306   displs    = recvcounts + size;
2307   for (i=0; i<size; i++) {
2308     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2309     displs[i]     = A->rmap->range[i]/bs;
2310   }
2311 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2312   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2313 #else
2314   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2315 #endif
2316   /* ---------------------------------------------------------------
2317      Create the sequential matrix of the same type as the local block diagonal
2318   */
2319   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2320   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2321   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2322   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2323   b    = (Mat_SeqAIJ*)B->data;
2324 
2325   /*--------------------------------------------------------------------
2326     Copy my part of matrix column indices over
2327   */
2328   sendcount  = ad->nz + bd->nz;
2329   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2330   a_jsendbuf = ad->j;
2331   b_jsendbuf = bd->j;
2332   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2333   cnt        = 0;
2334   for (i=0; i<n; i++) {
2335 
2336     /* put in lower diagonal portion */
2337     m = bd->i[i+1] - bd->i[i];
2338     while (m > 0) {
2339       /* is it above diagonal (in bd (compressed) numbering) */
2340       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2341       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2342       m--;
2343     }
2344 
2345     /* put in diagonal portion */
2346     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2347       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2348     }
2349 
2350     /* put in upper diagonal portion */
2351     while (m-- > 0) {
2352       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2353     }
2354   }
2355   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2356 
2357   /*--------------------------------------------------------------------
2358     Gather all column indices to all processors
2359   */
2360   for (i=0; i<size; i++) {
2361     recvcounts[i] = 0;
2362     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2363       recvcounts[i] += lens[j];
2364     }
2365   }
2366   displs[0] = 0;
2367   for (i=1; i<size; i++) {
2368     displs[i] = displs[i-1] + recvcounts[i-1];
2369   }
2370 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2371   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2372 #else
2373   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2374 #endif
2375   /*--------------------------------------------------------------------
2376     Assemble the matrix into useable form (note numerical values not yet set)
2377   */
2378   /* set the b->ilen (length of each row) values */
2379   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2380   /* set the b->i indices */
2381   b->i[0] = 0;
2382   for (i=1; i<=A->rmap->N/bs; i++) {
2383     b->i[i] = b->i[i-1] + lens[i-1];
2384   }
2385   ierr = PetscFree(lens);CHKERRQ(ierr);
2386   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2387   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2388   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2389 
2390   if (A->symmetric) {
2391     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2392   } else if (A->hermitian) {
2393     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2394   } else if (A->structurally_symmetric) {
2395     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2396   }
2397   *newmat = B;
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatSOR_MPIBAIJ"
2403 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2404 {
2405   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2406   PetscErrorCode ierr;
2407   Vec            bb1 = 0;
2408 
2409   PetscFunctionBegin;
2410   if (flag == SOR_APPLY_UPPER) {
2411     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2412     PetscFunctionReturn(0);
2413   }
2414 
2415   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2416     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2417   }
2418 
2419   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2420     if (flag & SOR_ZERO_INITIAL_GUESS) {
2421       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2422       its--;
2423     }
2424 
2425     while (its--) {
2426       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2427       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2428 
2429       /* update rhs: bb1 = bb - B*x */
2430       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2431       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2432 
2433       /* local sweep */
2434       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2435     }
2436   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2437     if (flag & SOR_ZERO_INITIAL_GUESS) {
2438       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2439       its--;
2440     }
2441     while (its--) {
2442       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2443       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2444 
2445       /* update rhs: bb1 = bb - B*x */
2446       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2447       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2448 
2449       /* local sweep */
2450       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2451     }
2452   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2453     if (flag & SOR_ZERO_INITIAL_GUESS) {
2454       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2455       its--;
2456     }
2457     while (its--) {
2458       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2459       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2460 
2461       /* update rhs: bb1 = bb - B*x */
2462       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2463       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2464 
2465       /* local sweep */
2466       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2467     }
2468   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2469 
2470   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2471   PetscFunctionReturn(0);
2472 }
2473 
2474 #undef __FUNCT__
2475 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2476 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2477 {
2478   PetscErrorCode ierr;
2479   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2480   PetscInt       N,i,*garray = aij->garray;
2481   PetscInt       ib,jb,bs = A->rmap->bs;
2482   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2483   MatScalar      *a_val = a_aij->a;
2484   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2485   MatScalar      *b_val = b_aij->a;
2486   PetscReal      *work;
2487 
2488   PetscFunctionBegin;
2489   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2490   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2491   if (type == NORM_2) {
2492     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2493       for (jb=0; jb<bs; jb++) {
2494         for (ib=0; ib<bs; ib++) {
2495           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2496           a_val++;
2497         }
2498       }
2499     }
2500     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2501       for (jb=0; jb<bs; jb++) {
2502         for (ib=0; ib<bs; ib++) {
2503           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2504           b_val++;
2505         }
2506       }
2507     }
2508   } else if (type == NORM_1) {
2509     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2510       for (jb=0; jb<bs; jb++) {
2511         for (ib=0; ib<bs; ib++) {
2512           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2513           a_val++;
2514         }
2515       }
2516     }
2517     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2518       for (jb=0; jb<bs; jb++) {
2519        for (ib=0; ib<bs; ib++) {
2520           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2521           b_val++;
2522         }
2523       }
2524     }
2525   } else if (type == NORM_INFINITY) {
2526     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2527       for (jb=0; jb<bs; jb++) {
2528         for (ib=0; ib<bs; ib++) {
2529           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2530           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2531           a_val++;
2532         }
2533       }
2534     }
2535     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2536       for (jb=0; jb<bs; jb++) {
2537         for (ib=0; ib<bs; ib++) {
2538           int col = garray[b_aij->j[i]] * bs + jb;
2539           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2540           b_val++;
2541         }
2542       }
2543     }
2544   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2545   if (type == NORM_INFINITY) {
2546     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2547   } else {
2548     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2549   }
2550   ierr = PetscFree(work);CHKERRQ(ierr);
2551   if (type == NORM_2) {
2552     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2553   }
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2559 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2560 {
2561   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2562   PetscErrorCode ierr;
2563 
2564   PetscFunctionBegin;
2565   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 
2570 /* -------------------------------------------------------------------*/
2571 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2572                                        MatGetRow_MPIBAIJ,
2573                                        MatRestoreRow_MPIBAIJ,
2574                                        MatMult_MPIBAIJ,
2575                                 /* 4*/ MatMultAdd_MPIBAIJ,
2576                                        MatMultTranspose_MPIBAIJ,
2577                                        MatMultTransposeAdd_MPIBAIJ,
2578                                        0,
2579                                        0,
2580                                        0,
2581                                 /*10*/ 0,
2582                                        0,
2583                                        0,
2584                                        MatSOR_MPIBAIJ,
2585                                        MatTranspose_MPIBAIJ,
2586                                 /*15*/ MatGetInfo_MPIBAIJ,
2587                                        MatEqual_MPIBAIJ,
2588                                        MatGetDiagonal_MPIBAIJ,
2589                                        MatDiagonalScale_MPIBAIJ,
2590                                        MatNorm_MPIBAIJ,
2591                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2592                                        MatAssemblyEnd_MPIBAIJ,
2593                                        MatSetOption_MPIBAIJ,
2594                                        MatZeroEntries_MPIBAIJ,
2595                                 /*24*/ MatZeroRows_MPIBAIJ,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                        0,
2600                                 /*29*/ MatSetUp_MPIBAIJ,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                        0,
2605                                 /*34*/ MatDuplicate_MPIBAIJ,
2606                                        0,
2607                                        0,
2608                                        0,
2609                                        0,
2610                                 /*39*/ MatAXPY_MPIBAIJ,
2611                                        MatGetSubMatrices_MPIBAIJ,
2612                                        MatIncreaseOverlap_MPIBAIJ,
2613                                        MatGetValues_MPIBAIJ,
2614                                        MatCopy_MPIBAIJ,
2615                                 /*44*/ 0,
2616                                        MatScale_MPIBAIJ,
2617                                        0,
2618                                        0,
2619                                        MatZeroRowsColumns_MPIBAIJ,
2620                                 /*49*/ 0,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2626                                        0,
2627                                        MatSetUnfactored_MPIBAIJ,
2628                                        MatPermute_MPIBAIJ,
2629                                        MatSetValuesBlocked_MPIBAIJ,
2630                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2631                                        MatDestroy_MPIBAIJ,
2632                                        MatView_MPIBAIJ,
2633                                        0,
2634                                        0,
2635                                 /*64*/ 0,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                        0,
2640                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                 /*74*/ 0,
2646                                        MatFDColoringApply_BAIJ,
2647                                        0,
2648                                        0,
2649                                        0,
2650                                 /*79*/ 0,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                        MatLoad_MPIBAIJ,
2655                                 /*84*/ 0,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                 /*89*/ 0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                 /*94*/ 0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                 /*99*/ 0,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                 /*104*/0,
2676                                        MatRealPart_MPIBAIJ,
2677                                        MatImaginaryPart_MPIBAIJ,
2678                                        0,
2679                                        0,
2680                                 /*109*/0,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2686                                        0,
2687                                        MatGetGhosts_MPIBAIJ,
2688                                        0,
2689                                        0,
2690                                 /*119*/0,
2691                                        0,
2692                                        0,
2693                                        0,
2694                                        MatGetMultiProcBlock_MPIBAIJ,
2695                                 /*124*/0,
2696                                        MatGetColumnNorms_MPIBAIJ,
2697                                        MatInvertBlockDiagonal_MPIBAIJ,
2698                                        0,
2699                                        0,
2700                                /*129*/ 0,
2701                                        0,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                /*134*/ 0,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                /*139*/ 0,
2711                                        0,
2712                                        0,
2713                                        MatFDColoringSetUp_MPIXAIJ,
2714                                        0,
2715                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2716 };
2717 
2718 #undef __FUNCT__
2719 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2720 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2721 {
2722   PetscFunctionBegin;
2723   *a = ((Mat_MPIBAIJ*)A->data)->A;
2724   PetscFunctionReturn(0);
2725 }
2726 
2727 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2728 
2729 #undef __FUNCT__
2730 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2731 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2732 {
2733   PetscInt       m,rstart,cstart,cend;
2734   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2735   const PetscInt *JJ    =0;
2736   PetscScalar    *values=0;
2737   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2738   PetscErrorCode ierr;
2739 
2740   PetscFunctionBegin;
2741   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2742   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2743   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2744   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2745   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2746   m      = B->rmap->n/bs;
2747   rstart = B->rmap->rstart/bs;
2748   cstart = B->cmap->rstart/bs;
2749   cend   = B->cmap->rend/bs;
2750 
2751   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2752   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2753   for (i=0; i<m; i++) {
2754     nz = ii[i+1] - ii[i];
2755     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2756     nz_max = PetscMax(nz_max,nz);
2757     JJ     = jj + ii[i];
2758     for (j=0; j<nz; j++) {
2759       if (*JJ >= cstart) break;
2760       JJ++;
2761     }
2762     d = 0;
2763     for (; j<nz; j++) {
2764       if (*JJ++ >= cend) break;
2765       d++;
2766     }
2767     d_nnz[i] = d;
2768     o_nnz[i] = nz - d;
2769   }
2770   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2771   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2772 
2773   values = (PetscScalar*)V;
2774   if (!values) {
2775     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2776     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2777   }
2778   for (i=0; i<m; i++) {
2779     PetscInt          row    = i + rstart;
2780     PetscInt          ncols  = ii[i+1] - ii[i];
2781     const PetscInt    *icols = jj + ii[i];
2782     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2783       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2784       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2785     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2786       PetscInt j;
2787       for (j=0; j<ncols; j++) {
2788         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2789         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2790       }
2791     }
2792   }
2793 
2794   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2795   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2796   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2797   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2798   PetscFunctionReturn(0);
2799 }
2800 
2801 #undef __FUNCT__
2802 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2803 /*@C
2804    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2805    (the default parallel PETSc format).
2806 
2807    Collective on MPI_Comm
2808 
2809    Input Parameters:
2810 +  B - the matrix
2811 .  bs - the block size
2812 .  i - the indices into j for the start of each local row (starts with zero)
2813 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2814 -  v - optional values in the matrix
2815 
2816    Level: developer
2817 
2818    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2819    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2820    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2821    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2822    block column and the second index is over columns within a block.
2823 
2824 .keywords: matrix, aij, compressed row, sparse, parallel
2825 
2826 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2827 @*/
2828 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2829 {
2830   PetscErrorCode ierr;
2831 
2832   PetscFunctionBegin;
2833   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2834   PetscValidType(B,1);
2835   PetscValidLogicalCollectiveInt(B,bs,2);
2836   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2837   PetscFunctionReturn(0);
2838 }
2839 
2840 #undef __FUNCT__
2841 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2842 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2843 {
2844   Mat_MPIBAIJ    *b;
2845   PetscErrorCode ierr;
2846   PetscInt       i;
2847 
2848   PetscFunctionBegin;
2849   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2850   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2851   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2852   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2853 
2854   if (d_nnz) {
2855     for (i=0; i<B->rmap->n/bs; i++) {
2856       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2857     }
2858   }
2859   if (o_nnz) {
2860     for (i=0; i<B->rmap->n/bs; i++) {
2861       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2862     }
2863   }
2864 
2865   b      = (Mat_MPIBAIJ*)B->data;
2866   b->bs2 = bs*bs;
2867   b->mbs = B->rmap->n/bs;
2868   b->nbs = B->cmap->n/bs;
2869   b->Mbs = B->rmap->N/bs;
2870   b->Nbs = B->cmap->N/bs;
2871 
2872   for (i=0; i<=b->size; i++) {
2873     b->rangebs[i] = B->rmap->range[i]/bs;
2874   }
2875   b->rstartbs = B->rmap->rstart/bs;
2876   b->rendbs   = B->rmap->rend/bs;
2877   b->cstartbs = B->cmap->rstart/bs;
2878   b->cendbs   = B->cmap->rend/bs;
2879 
2880   if (!B->preallocated) {
2881     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2882     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2883     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2884     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2885     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2886     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2887     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2888     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2889     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2890   }
2891 
2892   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2893   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2894   B->preallocated = PETSC_TRUE;
2895   PetscFunctionReturn(0);
2896 }
2897 
2898 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2899 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2900 
2901 #undef __FUNCT__
2902 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2903 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2904 {
2905   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2906   PetscErrorCode ierr;
2907   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2908   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2909   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2910 
2911   PetscFunctionBegin;
2912   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2913   ii[0] = 0;
2914   for (i=0; i<M; i++) {
2915     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2916     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2917     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2918     /* remove one from count of matrix has diagonal */
2919     for (j=id[i]; j<id[i+1]; j++) {
2920       if (jd[j] == i) {ii[i+1]--;break;}
2921     }
2922   }
2923   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2924   cnt  = 0;
2925   for (i=0; i<M; i++) {
2926     for (j=io[i]; j<io[i+1]; j++) {
2927       if (garray[jo[j]] > rstart) break;
2928       jj[cnt++] = garray[jo[j]];
2929     }
2930     for (k=id[i]; k<id[i+1]; k++) {
2931       if (jd[k] != i) {
2932         jj[cnt++] = rstart + jd[k];
2933       }
2934     }
2935     for (; j<io[i+1]; j++) {
2936       jj[cnt++] = garray[jo[j]];
2937     }
2938   }
2939   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2940   PetscFunctionReturn(0);
2941 }
2942 
2943 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2944 
2945 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2946 
2947 #undef __FUNCT__
2948 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
2949 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2950 {
2951   PetscErrorCode ierr;
2952   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2953   Mat            B;
2954   Mat_MPIAIJ     *b;
2955 
2956   PetscFunctionBegin;
2957   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2958 
2959   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2960   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2961   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2962   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2963   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2964   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2965   b    = (Mat_MPIAIJ*) B->data;
2966 
2967   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2968   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2969   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2970   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2971   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2972   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2973   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2974   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2975   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2976   if (reuse == MAT_REUSE_MATRIX) {
2977     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2978   } else {
2979    *newmat = B;
2980   }
2981   PetscFunctionReturn(0);
2982 }
2983 
2984 /*MC
2985    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2986 
2987    Options Database Keys:
2988 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2989 . -mat_block_size <bs> - set the blocksize used to store the matrix
2990 - -mat_use_hash_table <fact>
2991 
2992   Level: beginner
2993 
2994 .seealso: MatCreateMPIBAIJ
2995 M*/
2996 
2997 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2998 
2999 #undef __FUNCT__
3000 #define __FUNCT__ "MatCreate_MPIBAIJ"
3001 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3002 {
3003   Mat_MPIBAIJ    *b;
3004   PetscErrorCode ierr;
3005   PetscBool      flg = PETSC_FALSE;
3006 
3007   PetscFunctionBegin;
3008   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3009   B->data = (void*)b;
3010 
3011   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3012   B->assembled = PETSC_FALSE;
3013 
3014   B->insertmode = NOT_SET_VALUES;
3015   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3016   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3017 
3018   /* build local table of row and column ownerships */
3019   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
3020 
3021   /* build cache for off array entries formed */
3022   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3023 
3024   b->donotstash  = PETSC_FALSE;
3025   b->colmap      = NULL;
3026   b->garray      = NULL;
3027   b->roworiented = PETSC_TRUE;
3028 
3029   /* stuff used in block assembly */
3030   b->barray = 0;
3031 
3032   /* stuff used for matrix vector multiply */
3033   b->lvec  = 0;
3034   b->Mvctx = 0;
3035 
3036   /* stuff for MatGetRow() */
3037   b->rowindices   = 0;
3038   b->rowvalues    = 0;
3039   b->getrowactive = PETSC_FALSE;
3040 
3041   /* hash table stuff */
3042   b->ht           = 0;
3043   b->hd           = 0;
3044   b->ht_size      = 0;
3045   b->ht_flag      = PETSC_FALSE;
3046   b->ht_fact      = 0;
3047   b->ht_total_ct  = 0;
3048   b->ht_insert_ct = 0;
3049 
3050   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3051   b->ijonly = PETSC_FALSE;
3052 
3053 
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3065   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3066 
3067   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3068   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
3069   if (flg) {
3070     PetscReal fact = 1.39;
3071     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3072     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3073     if (fact <= 1.0) fact = 1.39;
3074     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3075     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3076   }
3077   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3078   PetscFunctionReturn(0);
3079 }
3080 
3081 /*MC
3082    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3083 
3084    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3085    and MATMPIBAIJ otherwise.
3086 
3087    Options Database Keys:
3088 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3089 
3090   Level: beginner
3091 
3092 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3093 M*/
3094 
3095 #undef __FUNCT__
3096 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3097 /*@C
3098    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3099    (block compressed row).  For good matrix assembly performance
3100    the user should preallocate the matrix storage by setting the parameters
3101    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3102    performance can be increased by more than a factor of 50.
3103 
3104    Collective on Mat
3105 
3106    Input Parameters:
3107 +  B - the matrix
3108 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3109           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3110 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3111            submatrix  (same for all local rows)
3112 .  d_nnz - array containing the number of block nonzeros in the various block rows
3113            of the in diagonal portion of the local (possibly different for each block
3114            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3115            set it even if it is zero.
3116 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3117            submatrix (same for all local rows).
3118 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3119            off-diagonal portion of the local submatrix (possibly different for
3120            each block row) or NULL.
3121 
3122    If the *_nnz parameter is given then the *_nz parameter is ignored
3123 
3124    Options Database Keys:
3125 +   -mat_block_size - size of the blocks to use
3126 -   -mat_use_hash_table <fact>
3127 
3128    Notes:
3129    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3130    than it must be used on all processors that share the object for that argument.
3131 
3132    Storage Information:
3133    For a square global matrix we define each processor's diagonal portion
3134    to be its local rows and the corresponding columns (a square submatrix);
3135    each processor's off-diagonal portion encompasses the remainder of the
3136    local matrix (a rectangular submatrix).
3137 
3138    The user can specify preallocated storage for the diagonal part of
3139    the local submatrix with either d_nz or d_nnz (not both).  Set
3140    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3141    memory allocation.  Likewise, specify preallocated storage for the
3142    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3143 
3144    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3145    the figure below we depict these three local rows and all columns (0-11).
3146 
3147 .vb
3148            0 1 2 3 4 5 6 7 8 9 10 11
3149           --------------------------
3150    row 3  |o o o d d d o o o o  o  o
3151    row 4  |o o o d d d o o o o  o  o
3152    row 5  |o o o d d d o o o o  o  o
3153           --------------------------
3154 .ve
3155 
3156    Thus, any entries in the d locations are stored in the d (diagonal)
3157    submatrix, and any entries in the o locations are stored in the
3158    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3159    stored simply in the MATSEQBAIJ format for compressed row storage.
3160 
3161    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3162    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3163    In general, for PDE problems in which most nonzeros are near the diagonal,
3164    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3165    or you will get TERRIBLE performance; see the users' manual chapter on
3166    matrices.
3167 
3168    You can call MatGetInfo() to get information on how effective the preallocation was;
3169    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3170    You can also run with the option -info and look for messages with the string
3171    malloc in them to see if additional memory allocation was needed.
3172 
3173    Level: intermediate
3174 
3175 .keywords: matrix, block, aij, compressed row, sparse, parallel
3176 
3177 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3178 @*/
3179 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3180 {
3181   PetscErrorCode ierr;
3182 
3183   PetscFunctionBegin;
3184   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3185   PetscValidType(B,1);
3186   PetscValidLogicalCollectiveInt(B,bs,2);
3187   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3188   PetscFunctionReturn(0);
3189 }
3190 
3191 #undef __FUNCT__
3192 #define __FUNCT__ "MatCreateBAIJ"
3193 /*@C
3194    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3195    (block compressed row).  For good matrix assembly performance
3196    the user should preallocate the matrix storage by setting the parameters
3197    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3198    performance can be increased by more than a factor of 50.
3199 
3200    Collective on MPI_Comm
3201 
3202    Input Parameters:
3203 +  comm - MPI communicator
3204 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3205           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3206 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3207            This value should be the same as the local size used in creating the
3208            y vector for the matrix-vector product y = Ax.
3209 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3210            This value should be the same as the local size used in creating the
3211            x vector for the matrix-vector product y = Ax.
3212 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3213 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3214 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3215            submatrix  (same for all local rows)
3216 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3217            of the in diagonal portion of the local (possibly different for each block
3218            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3219            and set it even if it is zero.
3220 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3221            submatrix (same for all local rows).
3222 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3223            off-diagonal portion of the local submatrix (possibly different for
3224            each block row) or NULL.
3225 
3226    Output Parameter:
3227 .  A - the matrix
3228 
3229    Options Database Keys:
3230 +   -mat_block_size - size of the blocks to use
3231 -   -mat_use_hash_table <fact>
3232 
3233    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3234    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3235    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3236 
3237    Notes:
3238    If the *_nnz parameter is given then the *_nz parameter is ignored
3239 
3240    A nonzero block is any block that as 1 or more nonzeros in it
3241 
3242    The user MUST specify either the local or global matrix dimensions
3243    (possibly both).
3244 
3245    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3246    than it must be used on all processors that share the object for that argument.
3247 
3248    Storage Information:
3249    For a square global matrix we define each processor's diagonal portion
3250    to be its local rows and the corresponding columns (a square submatrix);
3251    each processor's off-diagonal portion encompasses the remainder of the
3252    local matrix (a rectangular submatrix).
3253 
3254    The user can specify preallocated storage for the diagonal part of
3255    the local submatrix with either d_nz or d_nnz (not both).  Set
3256    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3257    memory allocation.  Likewise, specify preallocated storage for the
3258    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3259 
3260    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3261    the figure below we depict these three local rows and all columns (0-11).
3262 
3263 .vb
3264            0 1 2 3 4 5 6 7 8 9 10 11
3265           --------------------------
3266    row 3  |o o o d d d o o o o  o  o
3267    row 4  |o o o d d d o o o o  o  o
3268    row 5  |o o o d d d o o o o  o  o
3269           --------------------------
3270 .ve
3271 
3272    Thus, any entries in the d locations are stored in the d (diagonal)
3273    submatrix, and any entries in the o locations are stored in the
3274    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3275    stored simply in the MATSEQBAIJ format for compressed row storage.
3276 
3277    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3278    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3279    In general, for PDE problems in which most nonzeros are near the diagonal,
3280    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3281    or you will get TERRIBLE performance; see the users' manual chapter on
3282    matrices.
3283 
3284    Level: intermediate
3285 
3286 .keywords: matrix, block, aij, compressed row, sparse, parallel
3287 
3288 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3289 @*/
3290 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3291 {
3292   PetscErrorCode ierr;
3293   PetscMPIInt    size;
3294 
3295   PetscFunctionBegin;
3296   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3297   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3298   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3299   if (size > 1) {
3300     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3301     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3302   } else {
3303     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3304     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3305   }
3306   PetscFunctionReturn(0);
3307 }
3308 
3309 #undef __FUNCT__
3310 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3311 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3312 {
3313   Mat            mat;
3314   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3315   PetscErrorCode ierr;
3316   PetscInt       len=0;
3317 
3318   PetscFunctionBegin;
3319   *newmat = 0;
3320   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3321   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3322   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3323   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3324 
3325   mat->factortype   = matin->factortype;
3326   mat->preallocated = PETSC_TRUE;
3327   mat->assembled    = PETSC_TRUE;
3328   mat->insertmode   = NOT_SET_VALUES;
3329 
3330   a             = (Mat_MPIBAIJ*)mat->data;
3331   mat->rmap->bs = matin->rmap->bs;
3332   a->bs2        = oldmat->bs2;
3333   a->mbs        = oldmat->mbs;
3334   a->nbs        = oldmat->nbs;
3335   a->Mbs        = oldmat->Mbs;
3336   a->Nbs        = oldmat->Nbs;
3337 
3338   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3339   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3340 
3341   a->size         = oldmat->size;
3342   a->rank         = oldmat->rank;
3343   a->donotstash   = oldmat->donotstash;
3344   a->roworiented  = oldmat->roworiented;
3345   a->rowindices   = 0;
3346   a->rowvalues    = 0;
3347   a->getrowactive = PETSC_FALSE;
3348   a->barray       = 0;
3349   a->rstartbs     = oldmat->rstartbs;
3350   a->rendbs       = oldmat->rendbs;
3351   a->cstartbs     = oldmat->cstartbs;
3352   a->cendbs       = oldmat->cendbs;
3353 
3354   /* hash table stuff */
3355   a->ht           = 0;
3356   a->hd           = 0;
3357   a->ht_size      = 0;
3358   a->ht_flag      = oldmat->ht_flag;
3359   a->ht_fact      = oldmat->ht_fact;
3360   a->ht_total_ct  = 0;
3361   a->ht_insert_ct = 0;
3362 
3363   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3364   if (oldmat->colmap) {
3365 #if defined(PETSC_USE_CTABLE)
3366     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3367 #else
3368     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3369     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3370     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3371 #endif
3372   } else a->colmap = 0;
3373 
3374   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3375     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3376     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3377     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3378   } else a->garray = 0;
3379 
3380   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3381   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3382   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3383   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3384   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3385 
3386   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3387   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3388   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3389   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3390   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3391   *newmat = mat;
3392   PetscFunctionReturn(0);
3393 }
3394 
3395 #undef __FUNCT__
3396 #define __FUNCT__ "MatLoad_MPIBAIJ"
3397 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3398 {
3399   PetscErrorCode ierr;
3400   int            fd;
3401   PetscInt       i,nz,j,rstart,rend;
3402   PetscScalar    *vals,*buf;
3403   MPI_Comm       comm;
3404   MPI_Status     status;
3405   PetscMPIInt    rank,size,maxnz;
3406   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3407   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3408   PetscInt       jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
3409   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3410   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3411   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3412 
3413   PetscFunctionBegin;
3414   /* force binary viewer to load .info file if it has not yet done so */
3415   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3416   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3417   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3418   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3419   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3420   if (bs < 0) bs = 1;
3421 
3422   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3423   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3424   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3425   if (!rank) {
3426     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3427     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3428   }
3429 
3430   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3431 
3432   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3433   M    = header[1]; N = header[2];
3434 
3435   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3436   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3437   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3438 
3439   /* If global sizes are set, check if they are consistent with that given in the file */
3440   if (sizesset) {
3441     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3442   }
3443   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3444   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3445 
3446   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3447 
3448   /*
3449      This code adds extra rows to make sure the number of rows is
3450      divisible by the blocksize
3451   */
3452   Mbs        = M/bs;
3453   extra_rows = bs - M + bs*Mbs;
3454   if (extra_rows == bs) extra_rows = 0;
3455   else                  Mbs++;
3456   if (extra_rows && !rank) {
3457     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3458   }
3459 
3460   /* determine ownership of all rows */
3461   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3462     mbs = Mbs/size + ((Mbs % size) > rank);
3463     m   = mbs*bs;
3464   } else { /* User set */
3465     m   = newmat->rmap->n;
3466     mbs = m/bs;
3467   }
3468   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3469   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3470 
3471   /* process 0 needs enough room for process with most rows */
3472   if (!rank) {
3473     mmax = rowners[1];
3474     for (i=2; i<=size; i++) {
3475       mmax = PetscMax(mmax,rowners[i]);
3476     }
3477     mmax*=bs;
3478   } else mmax = -1;             /* unused, but compiler warns anyway */
3479 
3480   rowners[0] = 0;
3481   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3482   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3483   rstart = rowners[rank];
3484   rend   = rowners[rank+1];
3485 
3486   /* distribute row lengths to all processors */
3487   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3488   if (!rank) {
3489     mend = m;
3490     if (size == 1) mend = mend - extra_rows;
3491     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3492     for (j=mend; j<m; j++) locrowlens[j] = 1;
3493     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3494     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3495     for (j=0; j<m; j++) {
3496       procsnz[0] += locrowlens[j];
3497     }
3498     for (i=1; i<size; i++) {
3499       mend = browners[i+1] - browners[i];
3500       if (i == size-1) mend = mend - extra_rows;
3501       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3502       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3503       /* calculate the number of nonzeros on each processor */
3504       for (j=0; j<browners[i+1]-browners[i]; j++) {
3505         procsnz[i] += rowlengths[j];
3506       }
3507       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3508     }
3509     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3510   } else {
3511     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3512   }
3513 
3514   if (!rank) {
3515     /* determine max buffer needed and allocate it */
3516     maxnz = procsnz[0];
3517     for (i=1; i<size; i++) {
3518       maxnz = PetscMax(maxnz,procsnz[i]);
3519     }
3520     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3521 
3522     /* read in my part of the matrix column indices  */
3523     nz     = procsnz[0];
3524     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3525     mycols = ibuf;
3526     if (size == 1) nz -= extra_rows;
3527     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3528     if (size == 1) {
3529       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3530     }
3531 
3532     /* read in every ones (except the last) and ship off */
3533     for (i=1; i<size-1; i++) {
3534       nz   = procsnz[i];
3535       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3536       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3537     }
3538     /* read in the stuff for the last proc */
3539     if (size != 1) {
3540       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3541       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3542       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3543       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3544     }
3545     ierr = PetscFree(cols);CHKERRQ(ierr);
3546   } else {
3547     /* determine buffer space needed for message */
3548     nz = 0;
3549     for (i=0; i<m; i++) {
3550       nz += locrowlens[i];
3551     }
3552     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3553     mycols = ibuf;
3554     /* receive message of column indices*/
3555     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3556     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3557     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3558   }
3559 
3560   /* loop over local rows, determining number of off diagonal entries */
3561   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3562   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3563   rowcount = 0; nzcount = 0;
3564   for (i=0; i<mbs; i++) {
3565     dcount  = 0;
3566     odcount = 0;
3567     for (j=0; j<bs; j++) {
3568       kmax = locrowlens[rowcount];
3569       for (k=0; k<kmax; k++) {
3570         tmp = mycols[nzcount++]/bs;
3571         if (!mask[tmp]) {
3572           mask[tmp] = 1;
3573           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3574           else masked1[dcount++] = tmp;
3575         }
3576       }
3577       rowcount++;
3578     }
3579 
3580     dlens[i]  = dcount;
3581     odlens[i] = odcount;
3582 
3583     /* zero out the mask elements we set */
3584     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3585     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3586   }
3587 
3588 
3589   if (!sizesset) {
3590     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3591   }
3592   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3593 
3594   if (!rank) {
3595     ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr);
3596     /* read in my part of the matrix numerical values  */
3597     nz     = procsnz[0];
3598     vals   = buf;
3599     mycols = ibuf;
3600     if (size == 1) nz -= extra_rows;
3601     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3602     if (size == 1) {
3603       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3604     }
3605 
3606     /* insert into matrix */
3607     jj = rstart*bs;
3608     for (i=0; i<m; i++) {
3609       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3610       mycols += locrowlens[i];
3611       vals   += locrowlens[i];
3612       jj++;
3613     }
3614     /* read in other processors (except the last one) and ship out */
3615     for (i=1; i<size-1; i++) {
3616       nz   = procsnz[i];
3617       vals = buf;
3618       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3619       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3620     }
3621     /* the last proc */
3622     if (size != 1) {
3623       nz   = procsnz[i] - extra_rows;
3624       vals = buf;
3625       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3626       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3627       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3628     }
3629     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3630   } else {
3631     /* receive numeric values */
3632     ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr);
3633 
3634     /* receive message of values*/
3635     vals   = buf;
3636     mycols = ibuf;
3637     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3638 
3639     /* insert into matrix */
3640     jj = rstart*bs;
3641     for (i=0; i<m; i++) {
3642       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3643       mycols += locrowlens[i];
3644       vals   += locrowlens[i];
3645       jj++;
3646     }
3647   }
3648   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3649   ierr = PetscFree(buf);CHKERRQ(ierr);
3650   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3651   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3652   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3653   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3654   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3655   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3656   PetscFunctionReturn(0);
3657 }
3658 
3659 #undef __FUNCT__
3660 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3661 /*@
3662    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3663 
3664    Input Parameters:
3665 .  mat  - the matrix
3666 .  fact - factor
3667 
3668    Not Collective, each process can use a different factor
3669 
3670    Level: advanced
3671 
3672   Notes:
3673    This can also be set by the command line option: -mat_use_hash_table <fact>
3674 
3675 .keywords: matrix, hashtable, factor, HT
3676 
3677 .seealso: MatSetOption()
3678 @*/
3679 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3680 {
3681   PetscErrorCode ierr;
3682 
3683   PetscFunctionBegin;
3684   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3685   PetscFunctionReturn(0);
3686 }
3687 
3688 #undef __FUNCT__
3689 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3690 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3691 {
3692   Mat_MPIBAIJ *baij;
3693 
3694   PetscFunctionBegin;
3695   baij          = (Mat_MPIBAIJ*)mat->data;
3696   baij->ht_fact = fact;
3697   PetscFunctionReturn(0);
3698 }
3699 
3700 #undef __FUNCT__
3701 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3702 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3703 {
3704   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3705 
3706   PetscFunctionBegin;
3707   if (Ad)     *Ad     = a->A;
3708   if (Ao)     *Ao     = a->B;
3709   if (colmap) *colmap = a->garray;
3710   PetscFunctionReturn(0);
3711 }
3712 
3713 /*
3714     Special version for direct calls from Fortran (to eliminate two function call overheads
3715 */
3716 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3717 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3718 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3719 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3720 #endif
3721 
3722 #undef __FUNCT__
3723 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3724 /*@C
3725   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3726 
3727   Collective on Mat
3728 
3729   Input Parameters:
3730 + mat - the matrix
3731 . min - number of input rows
3732 . im - input rows
3733 . nin - number of input columns
3734 . in - input columns
3735 . v - numerical values input
3736 - addvin - INSERT_VALUES or ADD_VALUES
3737 
3738   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3739 
3740   Level: advanced
3741 
3742 .seealso:   MatSetValuesBlocked()
3743 @*/
3744 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3745 {
3746   /* convert input arguments to C version */
3747   Mat        mat  = *matin;
3748   PetscInt   m    = *min, n = *nin;
3749   InsertMode addv = *addvin;
3750 
3751   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3752   const MatScalar *value;
3753   MatScalar       *barray     = baij->barray;
3754   PetscBool       roworiented = baij->roworiented;
3755   PetscErrorCode  ierr;
3756   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3757   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3758   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3759 
3760   PetscFunctionBegin;
3761   /* tasks normally handled by MatSetValuesBlocked() */
3762   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3763 #if defined(PETSC_USE_DEBUG)
3764   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3765   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3766 #endif
3767   if (mat->assembled) {
3768     mat->was_assembled = PETSC_TRUE;
3769     mat->assembled     = PETSC_FALSE;
3770   }
3771   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3772 
3773 
3774   if (!barray) {
3775     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3776     baij->barray = barray;
3777   }
3778 
3779   if (roworiented) stepval = (n-1)*bs;
3780   else stepval = (m-1)*bs;
3781 
3782   for (i=0; i<m; i++) {
3783     if (im[i] < 0) continue;
3784 #if defined(PETSC_USE_DEBUG)
3785     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3786 #endif
3787     if (im[i] >= rstart && im[i] < rend) {
3788       row = im[i] - rstart;
3789       for (j=0; j<n; j++) {
3790         /* If NumCol = 1 then a copy is not required */
3791         if ((roworiented) && (n == 1)) {
3792           barray = (MatScalar*)v + i*bs2;
3793         } else if ((!roworiented) && (m == 1)) {
3794           barray = (MatScalar*)v + j*bs2;
3795         } else { /* Here a copy is required */
3796           if (roworiented) {
3797             value = v + i*(stepval+bs)*bs + j*bs;
3798           } else {
3799             value = v + j*(stepval+bs)*bs + i*bs;
3800           }
3801           for (ii=0; ii<bs; ii++,value+=stepval) {
3802             for (jj=0; jj<bs; jj++) {
3803               *barray++ = *value++;
3804             }
3805           }
3806           barray -=bs2;
3807         }
3808 
3809         if (in[j] >= cstart && in[j] < cend) {
3810           col  = in[j] - cstart;
3811           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3812         } else if (in[j] < 0) continue;
3813 #if defined(PETSC_USE_DEBUG)
3814         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3815 #endif
3816         else {
3817           if (mat->was_assembled) {
3818             if (!baij->colmap) {
3819               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3820             }
3821 
3822 #if defined(PETSC_USE_DEBUG)
3823 #if defined(PETSC_USE_CTABLE)
3824             { PetscInt data;
3825               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3826               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3827             }
3828 #else
3829             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3830 #endif
3831 #endif
3832 #if defined(PETSC_USE_CTABLE)
3833             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3834             col  = (col - 1)/bs;
3835 #else
3836             col = (baij->colmap[in[j]] - 1)/bs;
3837 #endif
3838             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3839               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3840               col  =  in[j];
3841             }
3842           } else col = in[j];
3843           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3844         }
3845       }
3846     } else {
3847       if (!baij->donotstash) {
3848         if (roworiented) {
3849           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3850         } else {
3851           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3852         }
3853       }
3854     }
3855   }
3856 
3857   /* task normally handled by MatSetValuesBlocked() */
3858   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3859   PetscFunctionReturn(0);
3860 }
3861 
3862 #undef __FUNCT__
3863 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3864 /*@
3865      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3866          CSR format the local rows.
3867 
3868    Collective on MPI_Comm
3869 
3870    Input Parameters:
3871 +  comm - MPI communicator
3872 .  bs - the block size, only a block size of 1 is supported
3873 .  m - number of local rows (Cannot be PETSC_DECIDE)
3874 .  n - This value should be the same as the local size used in creating the
3875        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3876        calculated if N is given) For square matrices n is almost always m.
3877 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3878 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3879 .   i - row indices
3880 .   j - column indices
3881 -   a - matrix values
3882 
3883    Output Parameter:
3884 .   mat - the matrix
3885 
3886    Level: intermediate
3887 
3888    Notes:
3889        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3890      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3891      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3892 
3893      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3894      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3895      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3896      with column-major ordering within blocks.
3897 
3898        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3899 
3900 .keywords: matrix, aij, compressed row, sparse, parallel
3901 
3902 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3903           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3904 @*/
3905 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3906 {
3907   PetscErrorCode ierr;
3908 
3909   PetscFunctionBegin;
3910   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3911   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3912   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3913   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3914   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3915   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3916   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3917   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3918   PetscFunctionReturn(0);
3919 }
3920 
3921 #undef __FUNCT__
3922 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIBAIJ"
3923 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3924 {
3925   PetscErrorCode ierr;
3926   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3927   PetscInt       *indx;
3928   PetscScalar    *values;
3929 
3930   PetscFunctionBegin;
3931   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3932   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3933     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3934     PetscInt       *dnz,*onz,sum,mbs,Nbs;
3935     PetscInt       *bindx,rmax=a->rmax,j;
3936 
3937     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3938     mbs = m/bs; Nbs = N/cbs;
3939     if (n == PETSC_DECIDE) {
3940       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3941     }
3942     /* Check sum(n) = Nbs */
3943     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3944     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3945 
3946     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3947     rstart -= mbs;
3948 
3949     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3950     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3951     for (i=0; i<mbs; i++) {
3952       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3953       nnz = nnz/bs;
3954       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3955       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3956       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3957     }
3958     ierr = PetscFree(bindx);CHKERRQ(ierr);
3959 
3960     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3961     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3962     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3963     ierr = MatSetType(*outmat,MATMPIBAIJ);CHKERRQ(ierr);
3964     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3965     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3966   }
3967 
3968   /* numeric phase */
3969   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3970   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3971 
3972   for (i=0; i<m; i++) {
3973     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3974     Ii   = i + rstart;
3975     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3976     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3977   }
3978   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3979   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3980   PetscFunctionReturn(0);
3981 }
3982