xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 0ad7597df3318518cf08ebbaa20b6a237bca446d)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++) {
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++) {
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig) {
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j];
226           else             value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         } else if (in[j] < 0) continue;
230 #if defined(PETSC_USE_DEBUG)
231         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
232 #endif
233         else {
234           if (mat->was_assembled) {
235             if (!baij->colmap) {
236               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined (PETSC_USE_CTABLE)
239             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
240             col  = col - 1;
241 #else
242             col = baij->colmap[in[j]/bs] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
245               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
246               col =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
248               B = baij->B;
249               b = (Mat_SeqBAIJ*)(B)->data;
250               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
251               ba=b->a;
252             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
253             else col += in[j]%bs;
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j];
256           else             value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         mat->assembled = PETSC_FALSE;
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray=baij->barray;
283   PetscBool         roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if (!barray) {
291     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) {
296     stepval = (n-1)*bs;
297   } else {
298     stepval = (m-1)*bs;
299   }
300   for (i=0; i<m; i++) {
301     if (im[i] < 0) continue;
302 #if defined(PETSC_USE_DEBUG)
303     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
304 #endif
305     if (im[i] >= rstart && im[i] < rend) {
306       row = im[i] - rstart;
307       for (j=0; j<n; j++) {
308         /* If NumCol = 1 then a copy is not required */
309         if ((roworiented) && (n == 1)) {
310           barray = (MatScalar*)v + i*bs2;
311         } else if ((!roworiented) && (m == 1)) {
312           barray = (MatScalar*)v + j*bs2;
313         } else { /* Here a copy is required */
314           if (roworiented) {
315             value = v + (i*(stepval+bs) + j)*bs;
316           } else {
317             value = v + (j*(stepval+bs) + i)*bs;
318           }
319           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
320             for (jj=0; jj<bs; jj++) {
321               barray[jj]  = value[jj];
322             }
323             barray += bs;
324           }
325           barray -= bs2;
326         }
327 
328         if (in[j] >= cstart && in[j] < cend) {
329           col  = in[j] - cstart;
330           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
331         }
332         else if (in[j] < 0) continue;
333 #if defined(PETSC_USE_DEBUG)
334         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
335 #endif
336         else {
337           if (mat->was_assembled) {
338             if (!baij->colmap) {
339               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
340             }
341 
342 #if defined(PETSC_USE_DEBUG)
343 #if defined (PETSC_USE_CTABLE)
344             { PetscInt data;
345               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
346               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
347             }
348 #else
349             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
350 #endif
351 #endif
352 #if defined (PETSC_USE_CTABLE)
353             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
354             col  = (col - 1)/bs;
355 #else
356             col = (baij->colmap[in[j]] - 1)/bs;
357 #endif
358             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
359               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
360               col =  in[j];
361             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
362           } else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
389   PetscBool      roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408       row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j];
413         else             value = v[i+j*m];
414         /* Look up PetscInto the Hash Table */
415         key = (row/bs)*Nbs+(col/bs)+1;
416         h1  = HASH(size,key,tmp);
417 
418 
419         idx = h1;
420 #if defined(PETSC_USE_DEBUG)
421         insert_ct++;
422         total_ct++;
423         if (HT[idx] != key) {
424           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
425           if (idx == size) {
426             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
427             if (idx == h1) {
428               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
429             }
430           }
431         }
432 #else
433         if (HT[idx] != key) {
434           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
435           if (idx == size) {
436             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
437             if (idx == h1) {
438               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
439             }
440           }
441         }
442 #endif
443         /* A HASH table entry is found, so insert the values at the correct address */
444         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
445         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
446       }
447     } else {
448       if (!baij->donotstash) {
449         if (roworiented) {
450           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
451         } else {
452           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
453         }
454       }
455     }
456   }
457 #if defined(PETSC_USE_DEBUG)
458   baij->ht_total_ct = total_ct;
459   baij->ht_insert_ct = insert_ct;
460 #endif
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
466 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
467 {
468   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
469   PetscBool         roworiented = baij->roworiented;
470   PetscErrorCode    ierr;
471   PetscInt          i,j,ii,jj,row,col;
472   PetscInt          rstart=baij->rstartbs;
473   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
474   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
475   PetscReal         tmp;
476   MatScalar         **HD = baij->hd,*baij_a;
477   const PetscScalar *v_t,*value;
478 #if defined(PETSC_USE_DEBUG)
479   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
480 #endif
481 
482   PetscFunctionBegin;
483   if (roworiented) {
484     stepval = (n-1)*bs;
485   } else {
486     stepval = (m-1)*bs;
487   }
488   for (i=0; i<m; i++) {
489 #if defined(PETSC_USE_DEBUG)
490     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
491     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
492 #endif
493     row   = im[i];
494     v_t   = v + i*nbs2;
495     if (row >= rstart && row < rend) {
496       for (j=0; j<n; j++) {
497         col = in[j];
498 
499         /* Look up into the Hash Table */
500         key = row*Nbs+col+1;
501         h1  = HASH(size,key,tmp);
502 
503         idx = h1;
504 #if defined(PETSC_USE_DEBUG)
505         total_ct++;
506         insert_ct++;
507        if (HT[idx] != key) {
508           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
509           if (idx == size) {
510             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
511             if (idx == h1) {
512               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
513             }
514           }
515         }
516 #else
517         if (HT[idx] != key) {
518           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
519           if (idx == size) {
520             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
521             if (idx == h1) {
522               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
523             }
524           }
525         }
526 #endif
527         baij_a = HD[idx];
528         if (roworiented) {
529           /*value = v + i*(stepval+bs)*bs + j*bs;*/
530           /* value = v + (i*(stepval+bs)+j)*bs; */
531           value = v_t;
532           v_t  += bs;
533           if (addv == ADD_VALUES) {
534             for (ii=0; ii<bs; ii++,value+=stepval) {
535               for (jj=ii; jj<bs2; jj+=bs) {
536                 baij_a[jj]  += *value++;
537               }
538             }
539           } else {
540             for (ii=0; ii<bs; ii++,value+=stepval) {
541               for (jj=ii; jj<bs2; jj+=bs) {
542                 baij_a[jj]  = *value++;
543               }
544             }
545           }
546         } else {
547           value = v + j*(stepval+bs)*bs + i*bs;
548           if (addv == ADD_VALUES) {
549             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
550               for (jj=0; jj<bs; jj++) {
551                 baij_a[jj]  += *value++;
552               }
553             }
554           } else {
555             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
556               for (jj=0; jj<bs; jj++) {
557                 baij_a[jj]  = *value++;
558               }
559             }
560           }
561         }
562       }
563     } else {
564       if (!baij->donotstash) {
565         if (roworiented) {
566           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         } else {
568           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
569         }
570       }
571     }
572   }
573 #if defined(PETSC_USE_DEBUG)
574   baij->ht_total_ct = total_ct;
575   baij->ht_insert_ct = insert_ct;
576 #endif
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatGetValues_MPIBAIJ"
582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
583 {
584   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
585   PetscErrorCode ierr;
586   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
587   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
588 
589   PetscFunctionBegin;
590   for (i=0; i<m; i++) {
591     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
592     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
593     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
594       row = idxm[i] - bsrstart;
595       for (j=0; j<n; j++) {
596         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
597         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
598         if (idxn[j] >= bscstart && idxn[j] < bscend) {
599           col = idxn[j] - bscstart;
600           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
601         } else {
602           if (!baij->colmap) {
603             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
604           }
605 #if defined (PETSC_USE_CTABLE)
606           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
607           data --;
608 #else
609           data = baij->colmap[idxn[j]/bs]-1;
610 #endif
611           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
612           else {
613             col  = data + idxn[j]%bs;
614             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
615           }
616         }
617       }
618     } else {
619       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620     }
621   }
622  PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatNorm_MPIBAIJ"
627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
628 {
629   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
630   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
633   PetscReal      sum = 0.0;
634   MatScalar      *v;
635 
636   PetscFunctionBegin;
637   if (baij->size == 1) {
638     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
639   } else {
640     if (type == NORM_FROBENIUS) {
641       v = amat->a;
642       nz = amat->nz*bs2;
643       for (i=0; i<nz; i++) {
644         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
645       }
646       v = bmat->a;
647       nz = bmat->nz*bs2;
648       for (i=0; i<nz; i++) {
649         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
650       }
651       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
652       *nrm = PetscSqrtReal(*nrm);
653     } else if (type == NORM_1) { /* max column sum */
654       PetscReal *tmp,*tmp2;
655       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
656       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
657       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
658       v = amat->a; jj = amat->j;
659       for (i=0; i<amat->nz; i++) {
660         for (j=0; j<bs; j++) {
661           col = bs*(cstart + *jj) + j; /* column index */
662           for (row=0; row<bs; row++) {
663             tmp[col] += PetscAbsScalar(*v);  v++;
664           }
665         }
666         jj++;
667       }
668       v = bmat->a; jj = bmat->j;
669       for (i=0; i<bmat->nz; i++) {
670         for (j=0; j<bs; j++) {
671           col = bs*garray[*jj] + j;
672           for (row=0; row<bs; row++) {
673             tmp[col] += PetscAbsScalar(*v); v++;
674           }
675         }
676         jj++;
677       }
678       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
679       *nrm = 0.0;
680       for (j=0; j<mat->cmap->N; j++) {
681         if (tmp2[j] > *nrm) *nrm = tmp2[j];
682       }
683       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
684     } else if (type == NORM_INFINITY) { /* max row sum */
685       PetscReal *sums;
686       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
687       sum = 0.0;
688       for (j=0; j<amat->mbs; j++) {
689         for (row=0; row<bs; row++) sums[row] = 0.0;
690         v = amat->a + bs2*amat->i[j];
691         nz = amat->i[j+1]-amat->i[j];
692         for (i=0; i<nz; i++) {
693           for (col=0; col<bs; col++) {
694             for (row=0; row<bs; row++) {
695               sums[row] += PetscAbsScalar(*v); v++;
696             }
697           }
698         }
699         v = bmat->a + bs2*bmat->i[j];
700         nz = bmat->i[j+1]-bmat->i[j];
701         for (i=0; i<nz; i++) {
702           for (col=0; col<bs; col++) {
703             for (row=0; row<bs; row++) {
704               sums[row] += PetscAbsScalar(*v); v++;
705             }
706           }
707         }
708         for (row=0; row<bs; row++) {
709           if (sums[row] > sum) sum = sums[row];
710         }
711       }
712       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
713       ierr = PetscFree(sums);CHKERRQ(ierr);
714     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
715   }
716   PetscFunctionReturn(0);
717 }
718 
719 /*
720   Creates the hash table, and sets the table
721   This table is created only once.
722   If new entried need to be added to the matrix
723   then the hash table has to be destroyed and
724   recreated.
725 */
726 #undef __FUNCT__
727 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
728 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
729 {
730   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
731   Mat            A = baij->A,B=baij->B;
732   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
733   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
734   PetscErrorCode ierr;
735   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
736   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
737   PetscInt       *HT,key;
738   MatScalar      **HD;
739   PetscReal      tmp;
740 #if defined(PETSC_USE_INFO)
741   PetscInt       ct=0,max=0;
742 #endif
743 
744   PetscFunctionBegin;
745   if (baij->ht) PetscFunctionReturn(0);
746 
747   baij->ht_size = (PetscInt)(factor*nz);
748   ht_size       = baij->ht_size;
749 
750   /* Allocate Memory for Hash Table */
751   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
752   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
753   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
754   HD   = baij->hd;
755   HT   = baij->ht;
756 
757   /* Loop Over A */
758   for (i=0; i<a->mbs; i++) {
759     for (j=ai[i]; j<ai[i+1]; j++) {
760       row = i+rstart;
761       col = aj[j]+cstart;
762 
763       key = row*Nbs + col + 1;
764       h1  = HASH(ht_size,key,tmp);
765       for (k=0; k<ht_size; k++) {
766         if (!HT[(h1+k)%ht_size]) {
767           HT[(h1+k)%ht_size] = key;
768           HD[(h1+k)%ht_size] = a->a + j*bs2;
769           break;
770 #if defined(PETSC_USE_INFO)
771         } else {
772           ct++;
773 #endif
774         }
775       }
776 #if defined(PETSC_USE_INFO)
777       if (k> max) max = k;
778 #endif
779     }
780   }
781   /* Loop Over B */
782   for (i=0; i<b->mbs; i++) {
783     for (j=bi[i]; j<bi[i+1]; j++) {
784       row = i+rstart;
785       col = garray[bj[j]];
786       key = row*Nbs + col + 1;
787       h1  = HASH(ht_size,key,tmp);
788       for (k=0; k<ht_size; k++) {
789         if (!HT[(h1+k)%ht_size]) {
790           HT[(h1+k)%ht_size] = key;
791           HD[(h1+k)%ht_size] = b->a + j*bs2;
792           break;
793 #if defined(PETSC_USE_INFO)
794         } else {
795           ct++;
796 #endif
797         }
798       }
799 #if defined(PETSC_USE_INFO)
800       if (k> max) max = k;
801 #endif
802     }
803   }
804 
805   /* Print Summary */
806 #if defined(PETSC_USE_INFO)
807   for (i=0,j=0; i<ht_size; i++) {
808     if (HT[i]) {j++;}
809   }
810   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
811 #endif
812   PetscFunctionReturn(0);
813 }
814 
815 #undef __FUNCT__
816 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
817 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
818 {
819   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
820   PetscErrorCode ierr;
821   PetscInt       nstash,reallocs;
822   InsertMode     addv;
823 
824   PetscFunctionBegin;
825   if (baij->donotstash || mat->nooffprocentries) {
826     PetscFunctionReturn(0);
827   }
828 
829   /* make sure all processors are either in INSERTMODE or ADDMODE */
830   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
831   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
832   mat->insertmode = addv; /* in case this processor had no cache */
833 
834   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
835   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
836   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
837   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
838   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
839   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
840   PetscFunctionReturn(0);
841 }
842 
843 #undef __FUNCT__
844 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
845 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
846 {
847   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
848   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
849   PetscErrorCode ierr;
850   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
851   PetscInt       *row,*col;
852   PetscBool      r1,r2,r3,other_disassembled;
853   MatScalar      *val;
854   InsertMode     addv = mat->insertmode;
855   PetscMPIInt    n;
856 
857   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
858   PetscFunctionBegin;
859   if (!baij->donotstash && !mat->nooffprocentries) {
860     while (1) {
861       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
862       if (!flg) break;
863 
864       for (i=0; i<n;) {
865         /* Now identify the consecutive vals belonging to the same row */
866         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
867         if (j < n) ncols = j-i;
868         else       ncols = n-i;
869         /* Now assemble all these values with a single function call */
870         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
871         i = j;
872       }
873     }
874     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
875     /* Now process the block-stash. Since the values are stashed column-oriented,
876        set the roworiented flag to column oriented, and after MatSetValues()
877        restore the original flags */
878     r1 = baij->roworiented;
879     r2 = a->roworiented;
880     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
881     baij->roworiented = PETSC_FALSE;
882     a->roworiented    = PETSC_FALSE;
883     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
884     while (1) {
885       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
886       if (!flg) break;
887 
888       for (i=0; i<n;) {
889         /* Now identify the consecutive vals belonging to the same row */
890         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
891         if (j < n) ncols = j-i;
892         else       ncols = n-i;
893         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
894         i = j;
895       }
896     }
897     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
898     baij->roworiented = r1;
899     a->roworiented    = r2;
900     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
901   }
902 
903   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
904   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
905 
906   /* determine if any processor has disassembled, if so we must
907      also disassemble ourselfs, in order that we may reassemble. */
908   /*
909      if nonzero structure of submatrix B cannot change then we know that
910      no processor disassembled thus we can skip this stuff
911   */
912   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
913     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
914     if (mat->was_assembled && !other_disassembled) {
915       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
916     }
917   }
918 
919   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
920     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
921   }
922   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
923   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
924   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
925 
926 #if defined(PETSC_USE_INFO)
927   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
928     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
929     baij->ht_total_ct  = 0;
930     baij->ht_insert_ct = 0;
931   }
932 #endif
933   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
934     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
935     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
936     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
937   }
938 
939   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
940   baij->rowvalues = 0;
941   PetscFunctionReturn(0);
942 }
943 
944 #undef __FUNCT__
945 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
946 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
947 {
948   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
949   PetscErrorCode    ierr;
950   PetscMPIInt       size = baij->size,rank = baij->rank;
951   PetscInt          bs = mat->rmap->bs;
952   PetscBool         iascii,isdraw;
953   PetscViewer       sviewer;
954   PetscViewerFormat format;
955 
956   PetscFunctionBegin;
957   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
958   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
959   if (iascii) {
960     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
961     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
962       MatInfo info;
963       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
964       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
965       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
966       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
967              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
968       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
969       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
970       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
971       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
972       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
973       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
974       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
975       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
976       PetscFunctionReturn(0);
977     } else if (format == PETSC_VIEWER_ASCII_INFO) {
978       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
979       PetscFunctionReturn(0);
980     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
981       PetscFunctionReturn(0);
982     }
983   }
984 
985   if (isdraw) {
986     PetscDraw       draw;
987     PetscBool  isnull;
988     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
989     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
990   }
991 
992   if (size == 1) {
993     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
994     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
995   } else {
996     /* assemble the entire matrix onto first processor. */
997     Mat         A;
998     Mat_SeqBAIJ *Aloc;
999     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1000     MatScalar   *a;
1001 
1002     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1003     /* Perhaps this should be the type of mat? */
1004     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1005     if (!rank) {
1006       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1007     } else {
1008       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1009     }
1010     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1011     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1012     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1013     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1014 
1015     /* copy over the A part */
1016     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1017     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1018     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1019 
1020     for (i=0; i<mbs; i++) {
1021       rvals[0] = bs*(baij->rstartbs + i);
1022       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1023       for (j=ai[i]; j<ai[i+1]; j++) {
1024         col = (baij->cstartbs+aj[j])*bs;
1025         for (k=0; k<bs; k++) {
1026           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1027           col++; a += bs;
1028         }
1029       }
1030     }
1031     /* copy over the B part */
1032     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1033     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1034     for (i=0; i<mbs; i++) {
1035       rvals[0] = bs*(baij->rstartbs + i);
1036       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1037       for (j=ai[i]; j<ai[i+1]; j++) {
1038         col = baij->garray[aj[j]]*bs;
1039         for (k=0; k<bs; k++) {
1040           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1041           col++; a += bs;
1042         }
1043       }
1044     }
1045     ierr = PetscFree(rvals);CHKERRQ(ierr);
1046     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1047     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1048     /*
1049        Everyone has to call to draw the matrix since the graphics waits are
1050        synchronized across all processors that share the PetscDraw object
1051     */
1052     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1053     if (!rank) {
1054       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1055     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1056       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1057       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1058     }
1059     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1060     ierr = MatDestroy(&A);CHKERRQ(ierr);
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1067 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1068 {
1069   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1070   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1071   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1072   PetscErrorCode ierr;
1073   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1074   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1075   int            fd;
1076   PetscScalar    *column_values;
1077   FILE           *file;
1078   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1079   PetscInt       message_count,flowcontrolcount;
1080 
1081   PetscFunctionBegin;
1082   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1083   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1084   nz   = bs2*(A->nz + B->nz);
1085   rlen = mat->rmap->n;
1086   if (!rank) {
1087     header[0] = MAT_FILE_CLASSID;
1088     header[1] = mat->rmap->N;
1089     header[2] = mat->cmap->N;
1090     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1091     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1092     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1093     /* get largest number of rows any processor has */
1094     range = mat->rmap->range;
1095     for (i=1; i<size; i++) {
1096       rlen = PetscMax(rlen,range[i+1] - range[i]);
1097     }
1098   } else {
1099     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1100   }
1101 
1102   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1103   /* compute lengths of each row  */
1104   for (i=0; i<a->mbs; i++) {
1105     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1106   }
1107   /* store the row lengths to the file */
1108   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1109   if (!rank) {
1110     MPI_Status status;
1111     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1112     rlen  = (range[1] - range[0])/bs;
1113     for (i=0; i<rlen; i++) {
1114       for (j=0; j<bs; j++) {
1115         row_lens[i*bs+j] = bs*crow_lens[i];
1116       }
1117     }
1118     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1119     for (i=1; i<size; i++) {
1120       rlen = (range[i+1] - range[i])/bs;
1121       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1122       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1123       for (k=0; k<rlen; k++) {
1124         for (j=0; j<bs; j++) {
1125           row_lens[k*bs+j] = bs*crow_lens[k];
1126         }
1127       }
1128       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1129     }
1130     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1131     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1132   } else {
1133     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1134     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1135     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1136   }
1137   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1138 
1139   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1140      information needed to make it for each row from a block row. This does require more communication but still not more than
1141      the communication needed for the nonzero values  */
1142   nzmax = nz; /*  space a largest processor needs */
1143   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1144   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1145   cnt  = 0;
1146   for (i=0; i<a->mbs; i++) {
1147     pcnt = cnt;
1148     for (j=B->i[i]; j<B->i[i+1]; j++) {
1149       if ((col = garray[B->j[j]]) > cstart) break;
1150       for (l=0; l<bs; l++) {
1151         column_indices[cnt++] = bs*col+l;
1152       }
1153     }
1154     for (k=A->i[i]; k<A->i[i+1]; k++) {
1155       for (l=0; l<bs; l++) {
1156         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1157       }
1158     }
1159     for (; j<B->i[i+1]; j++) {
1160       for (l=0; l<bs; l++) {
1161         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1162       }
1163     }
1164     len = cnt - pcnt;
1165     for (k=1; k<bs; k++) {
1166       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1167       cnt += len;
1168     }
1169   }
1170   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1171 
1172   /* store the columns to the file */
1173   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1174   if (!rank) {
1175     MPI_Status status;
1176     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1177     for (i=1; i<size; i++) {
1178       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1179       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1180       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1181       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1182     }
1183     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1184   } else {
1185     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1186     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1187     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1188     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1189   }
1190   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1191 
1192   /* load up the numerical values */
1193   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1194   cnt = 0;
1195   for (i=0; i<a->mbs; i++) {
1196     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1197     for (j=B->i[i]; j<B->i[i+1]; j++) {
1198       if (garray[B->j[j]] > cstart) break;
1199       for (l=0; l<bs; l++) {
1200         for (ll=0; ll<bs; ll++) {
1201           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1202         }
1203       }
1204       cnt += bs;
1205     }
1206     for (k=A->i[i]; k<A->i[i+1]; k++) {
1207       for (l=0; l<bs; l++) {
1208         for (ll=0; ll<bs; ll++) {
1209           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1210         }
1211       }
1212       cnt += bs;
1213     }
1214     for (; j<B->i[i+1]; j++) {
1215       for (l=0; l<bs; l++) {
1216         for (ll=0; ll<bs; ll++) {
1217           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1218         }
1219       }
1220       cnt += bs;
1221     }
1222     cnt += (bs-1)*rlen;
1223   }
1224   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1225 
1226   /* store the column values to the file */
1227   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1228   if (!rank) {
1229     MPI_Status status;
1230     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1231     for (i=1; i<size; i++) {
1232       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1233       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1234       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1235       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1236     }
1237     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1238   } else {
1239     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1240     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1241     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1242     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1243   }
1244   ierr = PetscFree(column_values);CHKERRQ(ierr);
1245 
1246   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1247   if (file) {
1248     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1249   }
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatView_MPIBAIJ"
1255 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1256 {
1257   PetscErrorCode ierr;
1258   PetscBool      iascii,isdraw,issocket,isbinary;
1259 
1260   PetscFunctionBegin;
1261   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1262   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1263   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1264   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1265   if (iascii || isdraw || issocket) {
1266     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1267   } else if (isbinary) {
1268     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1269   } else {
1270     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1271   }
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1277 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1278 {
1279   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1280   PetscErrorCode ierr;
1281 
1282   PetscFunctionBegin;
1283 #if defined(PETSC_USE_LOG)
1284   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1285 #endif
1286   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1287   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1288   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1289   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1290 #if defined (PETSC_USE_CTABLE)
1291   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1292 #else
1293   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1294 #endif
1295   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1296   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1297   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1298   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1299   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1300   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1301   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1302   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1303 
1304   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatMult_MPIBAIJ"
1319 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1320 {
1321   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1322   PetscErrorCode ierr;
1323   PetscInt       nt;
1324 
1325   PetscFunctionBegin;
1326   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1327   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1328   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1329   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1330   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1331   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1332   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1333   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1339 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1340 {
1341   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1342   PetscErrorCode ierr;
1343 
1344   PetscFunctionBegin;
1345   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1346   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1347   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1348   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1354 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1355 {
1356   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1357   PetscErrorCode ierr;
1358   PetscBool      merged;
1359 
1360   PetscFunctionBegin;
1361   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1362   /* do nondiagonal part */
1363   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1364   if (!merged) {
1365     /* send it on its way */
1366     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1367     /* do local part */
1368     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1369     /* receive remote parts: note this assumes the values are not actually */
1370     /* inserted in yy until the next line */
1371     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1372   } else {
1373     /* do local part */
1374     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1375     /* send it on its way */
1376     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1377     /* values actually were received in the Begin() but we need to call this nop */
1378     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1379   }
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1385 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1386 {
1387   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1388   PetscErrorCode ierr;
1389 
1390   PetscFunctionBegin;
1391   /* do nondiagonal part */
1392   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1393   /* send it on its way */
1394   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1395   /* do local part */
1396   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1397   /* receive remote parts: note this assumes the values are not actually */
1398   /* inserted in yy until the next line, which is true for my implementation*/
1399   /* but is not perhaps always true. */
1400   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 /*
1405   This only works correctly for square matrices where the subblock A->A is the
1406    diagonal block
1407 */
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1410 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1411 {
1412   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1413   PetscErrorCode ierr;
1414 
1415   PetscFunctionBegin;
1416   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1417   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatScale_MPIBAIJ"
1423 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1424 {
1425   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1426   PetscErrorCode ierr;
1427 
1428   PetscFunctionBegin;
1429   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1430   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1436 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1437 {
1438   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1439   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1440   PetscErrorCode ierr;
1441   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1442   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1443   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1444 
1445   PetscFunctionBegin;
1446   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1447   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1448   mat->getrowactive = PETSC_TRUE;
1449 
1450   if (!mat->rowvalues && (idx || v)) {
1451     /*
1452         allocate enough space to hold information from the longest row.
1453     */
1454     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1455     PetscInt     max = 1,mbs = mat->mbs,tmp;
1456     for (i=0; i<mbs; i++) {
1457       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1458       if (max < tmp) { max = tmp; }
1459     }
1460     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1461   }
1462   lrow = row - brstart;
1463 
1464   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1465   if (!v)   {pvA = 0; pvB = 0;}
1466   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1467   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1468   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1469   nztot = nzA + nzB;
1470 
1471   cmap  = mat->garray;
1472   if (v  || idx) {
1473     if (nztot) {
1474       /* Sort by increasing column numbers, assuming A and B already sorted */
1475       PetscInt imark = -1;
1476       if (v) {
1477         *v = v_p = mat->rowvalues;
1478         for (i=0; i<nzB; i++) {
1479           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1480           else break;
1481         }
1482         imark = i;
1483         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1484         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1485       }
1486       if (idx) {
1487         *idx = idx_p = mat->rowindices;
1488         if (imark > -1) {
1489           for (i=0; i<imark; i++) {
1490             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1491           }
1492         } else {
1493           for (i=0; i<nzB; i++) {
1494             if (cmap[cworkB[i]/bs] < cstart)
1495               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1496             else break;
1497           }
1498           imark = i;
1499         }
1500         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1501         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1502       }
1503     } else {
1504       if (idx) *idx = 0;
1505       if (v)   *v   = 0;
1506     }
1507   }
1508   *nz = nztot;
1509   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1510   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1516 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1517 {
1518   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1519 
1520   PetscFunctionBegin;
1521   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1522   baij->getrowactive = PETSC_FALSE;
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1528 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1529 {
1530   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1531   PetscErrorCode ierr;
1532 
1533   PetscFunctionBegin;
1534   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1535   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1541 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1542 {
1543   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1544   Mat            A = a->A,B = a->B;
1545   PetscErrorCode ierr;
1546   PetscReal      isend[5],irecv[5];
1547 
1548   PetscFunctionBegin;
1549   info->block_size     = (PetscReal)matin->rmap->bs;
1550   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1551   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1552   isend[3] = info->memory;  isend[4] = info->mallocs;
1553   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1554   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1555   isend[3] += info->memory;  isend[4] += info->mallocs;
1556   if (flag == MAT_LOCAL) {
1557     info->nz_used      = isend[0];
1558     info->nz_allocated = isend[1];
1559     info->nz_unneeded  = isend[2];
1560     info->memory       = isend[3];
1561     info->mallocs      = isend[4];
1562   } else if (flag == MAT_GLOBAL_MAX) {
1563     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1564     info->nz_used      = irecv[0];
1565     info->nz_allocated = irecv[1];
1566     info->nz_unneeded  = irecv[2];
1567     info->memory       = irecv[3];
1568     info->mallocs      = irecv[4];
1569   } else if (flag == MAT_GLOBAL_SUM) {
1570     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1571     info->nz_used      = irecv[0];
1572     info->nz_allocated = irecv[1];
1573     info->nz_unneeded  = irecv[2];
1574     info->memory       = irecv[3];
1575     info->mallocs      = irecv[4];
1576   } else {
1577     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1578   }
1579   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1580   info->fill_ratio_needed = 0;
1581   info->factor_mallocs    = 0;
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1587 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1588 {
1589   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1590   PetscErrorCode ierr;
1591 
1592   PetscFunctionBegin;
1593   switch (op) {
1594   case MAT_NEW_NONZERO_LOCATIONS:
1595   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1596   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1597   case MAT_KEEP_NONZERO_PATTERN:
1598   case MAT_NEW_NONZERO_LOCATION_ERR:
1599     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1600     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1601     break;
1602   case MAT_ROW_ORIENTED:
1603     a->roworiented = flg;
1604     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1605     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1606     break;
1607   case MAT_NEW_DIAGONALS:
1608     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1609     break;
1610   case MAT_IGNORE_OFF_PROC_ENTRIES:
1611     a->donotstash = flg;
1612     break;
1613   case MAT_USE_HASH_TABLE:
1614     a->ht_flag = flg;
1615     break;
1616   case MAT_SYMMETRIC:
1617   case MAT_STRUCTURALLY_SYMMETRIC:
1618   case MAT_HERMITIAN:
1619   case MAT_SYMMETRY_ETERNAL:
1620     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1621     break;
1622   default:
1623     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1624   }
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 #undef __FUNCT__
1629 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1630 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1631 {
1632   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1633   Mat_SeqBAIJ    *Aloc;
1634   Mat            B;
1635   PetscErrorCode ierr;
1636   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1637   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1638   MatScalar      *a;
1639 
1640   PetscFunctionBegin;
1641   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1642   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1643     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1644     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1645     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1646     /* Do not know preallocation information, but must set block size */
1647     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr);
1648   } else {
1649     B = *matout;
1650   }
1651 
1652   /* copy over the A part */
1653   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1654   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1655   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1656 
1657   for (i=0; i<mbs; i++) {
1658     rvals[0] = bs*(baij->rstartbs + i);
1659     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1660     for (j=ai[i]; j<ai[i+1]; j++) {
1661       col = (baij->cstartbs+aj[j])*bs;
1662       for (k=0; k<bs; k++) {
1663         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1664         col++; a += bs;
1665       }
1666     }
1667   }
1668   /* copy over the B part */
1669   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1670   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1671   for (i=0; i<mbs; i++) {
1672     rvals[0] = bs*(baij->rstartbs + i);
1673     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1674     for (j=ai[i]; j<ai[i+1]; j++) {
1675       col = baij->garray[aj[j]]*bs;
1676       for (k=0; k<bs; k++) {
1677         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1678         col++; a += bs;
1679       }
1680     }
1681   }
1682   ierr = PetscFree(rvals);CHKERRQ(ierr);
1683   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1684   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1685 
1686   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1687     *matout = B;
1688   } else {
1689     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1690   }
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1696 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1697 {
1698   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1699   Mat            a = baij->A,b = baij->B;
1700   PetscErrorCode ierr;
1701   PetscInt       s1,s2,s3;
1702 
1703   PetscFunctionBegin;
1704   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1705   if (rr) {
1706     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1707     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1708     /* Overlap communication with computation. */
1709     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1710   }
1711   if (ll) {
1712     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1713     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1714     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1715   }
1716   /* scale  the diagonal block */
1717   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1718 
1719   if (rr) {
1720     /* Do a scatter end and then right scale the off-diagonal block */
1721     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1722     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1723   }
1724 
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 #undef __FUNCT__
1729 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1730 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1731 {
1732   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1733   PetscErrorCode    ierr;
1734   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1735   PetscInt          i,*owners = A->rmap->range;
1736   PetscInt          *nprocs,j,idx,nsends,row;
1737   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1738   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1739   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1740   MPI_Comm          comm = ((PetscObject)A)->comm;
1741   MPI_Request       *send_waits,*recv_waits;
1742   MPI_Status        recv_status,*send_status;
1743   const PetscScalar *xx;
1744   PetscScalar       *bb;
1745 #if defined(PETSC_DEBUG)
1746   PetscBool         found = PETSC_FALSE;
1747 #endif
1748 
1749   PetscFunctionBegin;
1750   /*  first count number of contributors to each processor */
1751   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1752   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1753   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1754   j = 0;
1755   for (i=0; i<N; i++) {
1756     if (lastidx > (idx = rows[i])) j = 0;
1757     lastidx = idx;
1758     for (; j<size; j++) {
1759       if (idx >= owners[j] && idx < owners[j+1]) {
1760         nprocs[2*j]++;
1761         nprocs[2*j+1] = 1;
1762         owner[i] = j;
1763 #if defined(PETSC_DEBUG)
1764         found = PETSC_TRUE;
1765 #endif
1766         break;
1767       }
1768     }
1769 #if defined(PETSC_DEBUG)
1770     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1771     found = PETSC_FALSE;
1772 #endif
1773   }
1774   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1775 
1776   if (A->nooffproczerorows) {
1777     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1778     nrecvs = nsends;
1779     nmax   = N;
1780   } else {
1781     /* inform other processors of number of messages and max length*/
1782     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1783   }
1784 
1785   /* post receives:   */
1786   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1787   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1788   for (i=0; i<nrecvs; i++) {
1789     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1790   }
1791 
1792   /* do sends:
1793      1) starts[i] gives the starting index in svalues for stuff going to
1794      the ith processor
1795   */
1796   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1797   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1798   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1799   starts[0]  = 0;
1800   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1801   for (i=0; i<N; i++) {
1802     svalues[starts[owner[i]]++] = rows[i];
1803   }
1804 
1805   starts[0] = 0;
1806   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1807   count = 0;
1808   for (i=0; i<size; i++) {
1809     if (nprocs[2*i+1]) {
1810       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1811     }
1812   }
1813   ierr = PetscFree(starts);CHKERRQ(ierr);
1814 
1815   base = owners[rank];
1816 
1817   /*  wait on receives */
1818   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1819   count  = nrecvs;
1820   slen = 0;
1821   while (count) {
1822     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1823     /* unpack receives into our local space */
1824     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1825     source[imdex]  = recv_status.MPI_SOURCE;
1826     lens[imdex]    = n;
1827     slen          += n;
1828     count--;
1829   }
1830   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1831 
1832   /* move the data into the send scatter */
1833   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1834   count = 0;
1835   for (i=0; i<nrecvs; i++) {
1836     values = rvalues + i*nmax;
1837     for (j=0; j<lens[i]; j++) {
1838       lrows[count++] = values[j] - base;
1839     }
1840   }
1841   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1842   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1843   ierr = PetscFree(owner);CHKERRQ(ierr);
1844   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1845 
1846   /* fix right hand side if needed */
1847   if (x && b) {
1848     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1849     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1850     for (i=0; i<slen; i++) {
1851       bb[lrows[i]] = diag*xx[lrows[i]];
1852     }
1853     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1854     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1855   }
1856 
1857   /* actually zap the local rows */
1858   /*
1859         Zero the required rows. If the "diagonal block" of the matrix
1860      is square and the user wishes to set the diagonal we use separate
1861      code so that MatSetValues() is not called for each diagonal allocating
1862      new memory, thus calling lots of mallocs and slowing things down.
1863 
1864   */
1865   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1866   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1867   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1868     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1869   } else if (diag != 0.0) {
1870     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1871     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1872        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1873     for (i=0; i<slen; i++) {
1874       row  = lrows[i] + rstart_bs;
1875       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1876     }
1877     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1878     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1879   } else {
1880     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1881   }
1882 
1883   ierr = PetscFree(lrows);CHKERRQ(ierr);
1884 
1885   /* wait on sends */
1886   if (nsends) {
1887     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1888     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1889     ierr = PetscFree(send_status);CHKERRQ(ierr);
1890   }
1891   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1892   ierr = PetscFree(svalues);CHKERRQ(ierr);
1893 
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 #undef __FUNCT__
1898 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1899 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1900 {
1901   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1902   PetscErrorCode ierr;
1903 
1904   PetscFunctionBegin;
1905   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1910 
1911 #undef __FUNCT__
1912 #define __FUNCT__ "MatEqual_MPIBAIJ"
1913 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1914 {
1915   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1916   Mat            a,b,c,d;
1917   PetscBool      flg;
1918   PetscErrorCode ierr;
1919 
1920   PetscFunctionBegin;
1921   a = matA->A; b = matA->B;
1922   c = matB->A; d = matB->B;
1923 
1924   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1925   if (flg) {
1926     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1927   }
1928   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatCopy_MPIBAIJ"
1934 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1935 {
1936   PetscErrorCode ierr;
1937   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1938   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1939 
1940   PetscFunctionBegin;
1941   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1942   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1943     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1944   } else {
1945     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1946     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1947   }
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1953 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1954 {
1955   PetscErrorCode ierr;
1956 
1957   PetscFunctionBegin;
1958   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1964 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1965 {
1966   PetscErrorCode ierr;
1967   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1968   PetscBLASInt   bnz,one=1;
1969   Mat_SeqBAIJ    *x,*y;
1970 
1971   PetscFunctionBegin;
1972   if (str == SAME_NONZERO_PATTERN) {
1973     PetscScalar alpha = a;
1974     x = (Mat_SeqBAIJ *)xx->A->data;
1975     y = (Mat_SeqBAIJ *)yy->A->data;
1976     bnz = PetscBLASIntCast(x->nz);
1977     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1978     x = (Mat_SeqBAIJ *)xx->B->data;
1979     y = (Mat_SeqBAIJ *)yy->B->data;
1980     bnz = PetscBLASIntCast(x->nz);
1981     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1982   } else {
1983     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1984   }
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 #undef __FUNCT__
1989 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1990 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1991 {
1992   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1993   PetscErrorCode ierr;
1994 
1995   PetscFunctionBegin;
1996   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1997   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 #undef __FUNCT__
2002 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2003 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2004 {
2005   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2006   PetscErrorCode ierr;
2007 
2008   PetscFunctionBegin;
2009   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2010   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 #undef __FUNCT__
2015 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2016 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2017 {
2018   PetscErrorCode ierr;
2019   IS             iscol_local;
2020   PetscInt       csize;
2021 
2022   PetscFunctionBegin;
2023   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2024   if (call == MAT_REUSE_MATRIX) {
2025     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2026     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2027   } else {
2028     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2029   }
2030   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2031   if (call == MAT_INITIAL_MATRIX) {
2032     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2033     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2034   }
2035   PetscFunctionReturn(0);
2036 }
2037 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2038 #undef __FUNCT__
2039 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2040 /*
2041   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2042   in local and then by concatenating the local matrices the end result.
2043   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2044 */
2045 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2046 {
2047   PetscErrorCode ierr;
2048   PetscMPIInt    rank,size;
2049   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2050   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2051   Mat            M,Mreuse;
2052   MatScalar      *vwork,*aa;
2053   MPI_Comm       comm = ((PetscObject)mat)->comm;
2054   IS             isrow_new, iscol_new;
2055   PetscBool      idflag,allrows, allcols;
2056   Mat_SeqBAIJ    *aij;
2057 
2058 
2059   PetscFunctionBegin;
2060   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2061   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2062   /* The compression and expansion should be avoided. Doesn't point
2063      out errors, might change the indices, hence buggey */
2064   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2065   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2066 
2067   /* Check for special case: each processor gets entire matrix columns */
2068   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2069   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2070   if (idflag && ncol == mat->cmap->N) {
2071     allcols = PETSC_TRUE;
2072   } else {
2073     allcols = PETSC_FALSE;
2074   }
2075 
2076   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2077   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2078   if (idflag && nrow == mat->rmap->N) {
2079     allrows = PETSC_TRUE;
2080   } else {
2081     allrows = PETSC_FALSE;
2082   }
2083   if (call ==  MAT_REUSE_MATRIX) {
2084     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2085     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2086     ierr  = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2087   } else {
2088     ierr   = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2089   }
2090   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2091   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2092   /*
2093       m - number of local rows
2094       n - number of columns (same on all processors)
2095       rstart - first row in new global matrix generated
2096   */
2097   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2098   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2099   m    = m/bs;
2100   n    = n/bs;
2101 
2102   if (call == MAT_INITIAL_MATRIX) {
2103     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2104     ii  = aij->i;
2105     jj  = aij->j;
2106 
2107     /*
2108         Determine the number of non-zeros in the diagonal and off-diagonal
2109         portions of the matrix in order to do correct preallocation
2110     */
2111 
2112     /* first get start and end of "diagonal" columns */
2113     if (csize == PETSC_DECIDE) {
2114       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2115       if (mglobal == n*bs) { /* square matrix */
2116         nlocal = m;
2117       } else {
2118         nlocal = n/size + ((n % size) > rank);
2119       }
2120     } else {
2121       nlocal = csize/bs;
2122     }
2123     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2124     rstart = rend - nlocal;
2125     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2126 
2127     /* next, compute all the lengths */
2128     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2129     olens = dlens + m;
2130     for (i=0; i<m; i++) {
2131       jend = ii[i+1] - ii[i];
2132       olen = 0;
2133       dlen = 0;
2134       for (j=0; j<jend; j++) {
2135         if (*jj < rstart || *jj >= rend) olen++;
2136         else dlen++;
2137         jj++;
2138       }
2139       olens[i] = olen;
2140       dlens[i] = dlen;
2141     }
2142     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2143     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2144     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2145     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2146     ierr = PetscFree(dlens);CHKERRQ(ierr);
2147   } else {
2148     PetscInt ml,nl;
2149 
2150     M = *newmat;
2151     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2152     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2153     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2154     /*
2155          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2156        rather than the slower MatSetValues().
2157     */
2158     M->was_assembled = PETSC_TRUE;
2159     M->assembled     = PETSC_FALSE;
2160   }
2161   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2162   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2163   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2164   ii  = aij->i;
2165   jj  = aij->j;
2166   aa  = aij->a;
2167   for (i=0; i<m; i++) {
2168     row   = rstart/bs + i;
2169     nz    = ii[i+1] - ii[i];
2170     cwork = jj;     jj += nz;
2171     vwork = aa;     aa += nz*bs*bs;
2172     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2173   }
2174 
2175   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2176   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2177   *newmat = M;
2178 
2179   /* save submatrix used in processor for next request */
2180   if (call ==  MAT_INITIAL_MATRIX) {
2181     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2182     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2183   }
2184 
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 #undef __FUNCT__
2189 #define __FUNCT__ "MatPermute_MPIBAIJ"
2190 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2191 {
2192   MPI_Comm       comm,pcomm;
2193   PetscInt       first,rlocal_size,clocal_size,nrows;
2194   const PetscInt *rows;
2195   PetscMPIInt    size;
2196   IS             crowp,growp,irowp,lrowp,lcolp;
2197   PetscErrorCode ierr;
2198 
2199   PetscFunctionBegin;
2200   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2201   /* make a collective version of 'rowp' */
2202   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2203   if (pcomm==comm) {
2204     crowp = rowp;
2205   } else {
2206     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2207     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2208     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2209     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2210   }
2211   /* collect the global row permutation and invert it */
2212   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2213   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2214   if (pcomm!=comm) {
2215     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2216   }
2217   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2218   ierr = ISDestroy(&growp);CHKERRQ(ierr);
2219   /* get the local target indices */
2220   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2221   ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr);
2222   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2223   ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2224   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2225   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2226   /* the column permutation is so much easier;
2227      make a local version of 'colp' and invert it */
2228   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2229   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2230   if (size==1) {
2231     lcolp = colp;
2232   } else {
2233     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2234   }
2235   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2236   /* now we just get the submatrix */
2237   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2238   if (size>1) {
2239     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2240   }
2241   /* clean up */
2242   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2243   PetscFunctionReturn(0);
2244 }
2245 
2246 #undef __FUNCT__
2247 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2248 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2249 {
2250   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2251   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2252 
2253   PetscFunctionBegin;
2254   if (nghosts) { *nghosts = B->nbs;}
2255   if (ghosts) {*ghosts = baij->garray;}
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2260 
2261 #undef __FUNCT__
2262 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2263 /*
2264     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2265 */
2266 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2267 {
2268   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2269   PetscErrorCode        ierr;
2270   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2271   PetscInt              bs,i,n,nrows,j,k,m,ncols,col;
2272   const PetscInt        *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj;
2273   PetscInt              nis = iscoloring->n,nctot,*cols;
2274   PetscInt              *rowhit,M,cstart,cend,colb;
2275   PetscInt              *columnsforrow,l;
2276   IS                    *isa;
2277   PetscBool              done,flg;
2278   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2279   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2280 
2281   PetscFunctionBegin;
2282   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2283   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2284 
2285   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2286   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2287   M                = mat->rmap->n/bs;
2288   cstart           = mat->cmap->rstart/bs;
2289   cend             = mat->cmap->rend/bs;
2290   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2291   c->N             = mat->cmap->N/bs;
2292   c->m             = mat->rmap->n/bs;
2293   c->rstart        = mat->rmap->rstart/bs;
2294 
2295   c->ncolors       = nis;
2296   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2297   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2298   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2299   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2300   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2301   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2302 
2303   /* Allow access to data structures of local part of matrix */
2304   if (!baij->colmap) {
2305     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2306   }
2307   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2308   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2309 
2310   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2311   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2312 
2313   for (i=0; i<nis; i++) {
2314     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2315     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2316     c->ncolumns[i] = n;
2317     if (n) {
2318       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2319       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2320       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2321     } else {
2322       c->columns[i]  = 0;
2323     }
2324 
2325     if (ctype == IS_COLORING_GLOBAL) {
2326       /* Determine the total (parallel) number of columns of this color */
2327       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2328       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2329 
2330       nn   = PetscMPIIntCast(n);
2331       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2332       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2333       if (!nctot) {
2334         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2335       }
2336 
2337       disp[0] = 0;
2338       for (j=1; j<size; j++) {
2339         disp[j] = disp[j-1] + ncolsonproc[j-1];
2340       }
2341 
2342       /* Get complete list of columns for color on each processor */
2343       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2344       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2345       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2346     } else if (ctype == IS_COLORING_GHOSTED) {
2347       /* Determine local number of columns of this color on this process, including ghost points */
2348       nctot = n;
2349       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2350       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2351     } else {
2352       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2353     }
2354 
2355     /*
2356        Mark all rows affect by these columns
2357     */
2358     /* Temporary option to allow for debugging/testing */
2359     flg  = PETSC_FALSE;
2360     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2361     if (!flg) {/*-----------------------------------------------------------------------------*/
2362       /* crude, fast version */
2363       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2364       /* loop over columns*/
2365       for (j=0; j<nctot; j++) {
2366         if (ctype == IS_COLORING_GHOSTED) {
2367           col = ltog[cols[j]];
2368         } else {
2369           col  = cols[j];
2370         }
2371         if (col >= cstart && col < cend) {
2372           /* column is in diagonal block of matrix */
2373           rows = A_cj + A_ci[col-cstart];
2374           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2375         } else {
2376 #if defined (PETSC_USE_CTABLE)
2377           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2378           colb --;
2379 #else
2380           colb = baij->colmap[col] - 1;
2381 #endif
2382           if (colb == -1) {
2383             m = 0;
2384           } else {
2385             colb = colb/bs;
2386             rows = B_cj + B_ci[colb];
2387             m    = B_ci[colb+1] - B_ci[colb];
2388           }
2389         }
2390         /* loop over columns marking them in rowhit */
2391         for (k=0; k<m; k++) {
2392           rowhit[*rows++] = col + 1;
2393         }
2394       }
2395 
2396       /* count the number of hits */
2397       nrows = 0;
2398       for (j=0; j<M; j++) {
2399         if (rowhit[j]) nrows++;
2400       }
2401       c->nrows[i]         = nrows;
2402       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2403       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2404       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2405       nrows = 0;
2406       for (j=0; j<M; j++) {
2407         if (rowhit[j]) {
2408           c->rows[i][nrows]           = j;
2409           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2410           nrows++;
2411         }
2412       }
2413     } else {/*-------------------------------------------------------------------------------*/
2414       /* slow version, using rowhit as a linked list */
2415       PetscInt currentcol,fm,mfm;
2416       rowhit[M] = M;
2417       nrows     = 0;
2418       /* loop over columns*/
2419       for (j=0; j<nctot; j++) {
2420         if (ctype == IS_COLORING_GHOSTED) {
2421           col = ltog[cols[j]];
2422         } else {
2423           col  = cols[j];
2424         }
2425         if (col >= cstart && col < cend) {
2426           /* column is in diagonal block of matrix */
2427           rows = A_cj + A_ci[col-cstart];
2428           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2429         } else {
2430 #if defined (PETSC_USE_CTABLE)
2431           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2432           colb --;
2433 #else
2434           colb = baij->colmap[col] - 1;
2435 #endif
2436           if (colb == -1) {
2437             m = 0;
2438           } else {
2439             colb = colb/bs;
2440             rows = B_cj + B_ci[colb];
2441             m    = B_ci[colb+1] - B_ci[colb];
2442           }
2443         }
2444 
2445         /* loop over columns marking them in rowhit */
2446         fm    = M; /* fm points to first entry in linked list */
2447         for (k=0; k<m; k++) {
2448           currentcol = *rows++;
2449           /* is it already in the list? */
2450           do {
2451             mfm  = fm;
2452             fm   = rowhit[fm];
2453           } while (fm < currentcol);
2454           /* not in list so add it */
2455           if (fm != currentcol) {
2456             nrows++;
2457             columnsforrow[currentcol] = col;
2458             /* next three lines insert new entry into linked list */
2459             rowhit[mfm]               = currentcol;
2460             rowhit[currentcol]        = fm;
2461             fm                        = currentcol;
2462             /* fm points to present position in list since we know the columns are sorted */
2463           } else {
2464             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2465           }
2466         }
2467       }
2468       c->nrows[i]         = nrows;
2469       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2470       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2471       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2472       /* now store the linked list of rows into c->rows[i] */
2473       nrows = 0;
2474       fm    = rowhit[M];
2475       do {
2476         c->rows[i][nrows]            = fm;
2477         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2478         fm                           = rowhit[fm];
2479       } while (fm < M);
2480     } /* ---------------------------------------------------------------------------------------*/
2481     ierr = PetscFree(cols);CHKERRQ(ierr);
2482   }
2483 
2484   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2485   /*
2486        vscale will contain the "diagonal" on processor scalings followed by the off processor
2487   */
2488   if (ctype == IS_COLORING_GLOBAL) {
2489     PetscInt *garray;
2490     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2491     for (i=0; i<baij->B->cmap->n/bs; i++) {
2492       for (j=0; j<bs; j++) {
2493         garray[i*bs+j] = bs*baij->garray[i]+j;
2494       }
2495     }
2496     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2497     ierr = PetscFree(garray);CHKERRQ(ierr);
2498     CHKMEMQ;
2499     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2500     for (k=0; k<c->ncolors; k++) {
2501       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2502       for (l=0; l<c->nrows[k]; l++) {
2503         col = c->columnsforrow[k][l];
2504         if (col >= cstart && col < cend) {
2505           /* column is in diagonal block of matrix */
2506           colb = col - cstart;
2507         } else {
2508           /* column  is in "off-processor" part */
2509 #if defined (PETSC_USE_CTABLE)
2510           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2511           colb --;
2512 #else
2513           colb = baij->colmap[col] - 1;
2514 #endif
2515           colb = colb/bs;
2516           colb += cend - cstart;
2517         }
2518         c->vscaleforrow[k][l] = colb;
2519       }
2520     }
2521   } else if (ctype == IS_COLORING_GHOSTED) {
2522     /* Get gtol mapping */
2523     PetscInt N = mat->cmap->N, *gtol;
2524     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2525     for (i=0; i<N; i++) gtol[i] = -1;
2526     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2527 
2528     c->vscale = 0; /* will be created in MatFDColoringApply() */
2529     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2530     for (k=0; k<c->ncolors; k++) {
2531       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2532       for (l=0; l<c->nrows[k]; l++) {
2533         col = c->columnsforrow[k][l];      /* global column index */
2534         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2535       }
2536     }
2537     ierr = PetscFree(gtol);CHKERRQ(ierr);
2538   }
2539   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2540 
2541   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2542   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2543   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2544   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2545     CHKMEMQ;
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 #undef __FUNCT__
2550 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2551 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2552 {
2553   Mat            B;
2554   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2555   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2556   Mat_SeqAIJ     *b;
2557   PetscErrorCode ierr;
2558   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2559   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2560   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2561 
2562   PetscFunctionBegin;
2563   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2564   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2565 
2566   /* ----------------------------------------------------------------
2567      Tell every processor the number of nonzeros per row
2568   */
2569   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2570   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2571     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2572   }
2573   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2574   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2575   displs     = recvcounts + size;
2576   for (i=0; i<size; i++) {
2577     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2578     displs[i]     = A->rmap->range[i]/bs;
2579   }
2580 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2581   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2582 #else
2583   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2584 #endif
2585   /* ---------------------------------------------------------------
2586      Create the sequential matrix of the same type as the local block diagonal
2587   */
2588   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2589   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2590   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2591   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2592   b = (Mat_SeqAIJ *)B->data;
2593 
2594   /*--------------------------------------------------------------------
2595     Copy my part of matrix column indices over
2596   */
2597   sendcount  = ad->nz + bd->nz;
2598   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2599   a_jsendbuf = ad->j;
2600   b_jsendbuf = bd->j;
2601   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2602   cnt        = 0;
2603   for (i=0; i<n; i++) {
2604 
2605     /* put in lower diagonal portion */
2606     m = bd->i[i+1] - bd->i[i];
2607     while (m > 0) {
2608       /* is it above diagonal (in bd (compressed) numbering) */
2609       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2610       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2611       m--;
2612     }
2613 
2614     /* put in diagonal portion */
2615     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2616       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2617     }
2618 
2619     /* put in upper diagonal portion */
2620     while (m-- > 0) {
2621       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2622     }
2623   }
2624   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2625 
2626   /*--------------------------------------------------------------------
2627     Gather all column indices to all processors
2628   */
2629   for (i=0; i<size; i++) {
2630     recvcounts[i] = 0;
2631     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2632       recvcounts[i] += lens[j];
2633     }
2634   }
2635   displs[0]  = 0;
2636   for (i=1; i<size; i++) {
2637     displs[i] = displs[i-1] + recvcounts[i-1];
2638   }
2639 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2640   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2641 #else
2642   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2643 #endif
2644   /*--------------------------------------------------------------------
2645     Assemble the matrix into useable form (note numerical values not yet set)
2646   */
2647   /* set the b->ilen (length of each row) values */
2648   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2649   /* set the b->i indices */
2650   b->i[0] = 0;
2651   for (i=1; i<=A->rmap->N/bs; i++) {
2652     b->i[i] = b->i[i-1] + lens[i-1];
2653   }
2654   ierr = PetscFree(lens);CHKERRQ(ierr);
2655   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2656   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2657   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2658 
2659   if (A->symmetric) {
2660     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2661   } else if (A->hermitian) {
2662     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2663   } else if (A->structurally_symmetric) {
2664     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2665   }
2666   *newmat = B;
2667   PetscFunctionReturn(0);
2668 }
2669 
2670 #undef __FUNCT__
2671 #define __FUNCT__ "MatSOR_MPIBAIJ"
2672 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2673 {
2674   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2675   PetscErrorCode ierr;
2676   Vec            bb1 = 0;
2677 
2678   PetscFunctionBegin;
2679   if (flag == SOR_APPLY_UPPER) {
2680     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2681     PetscFunctionReturn(0);
2682   }
2683 
2684   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2685     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2686   }
2687 
2688   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2689     if (flag & SOR_ZERO_INITIAL_GUESS) {
2690       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2691       its--;
2692     }
2693 
2694     while (its--) {
2695       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2696       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2697 
2698       /* update rhs: bb1 = bb - B*x */
2699       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2700       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2701 
2702       /* local sweep */
2703       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2704     }
2705   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2706     if (flag & SOR_ZERO_INITIAL_GUESS) {
2707       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2708       its--;
2709     }
2710     while (its--) {
2711       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2712       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2713 
2714       /* update rhs: bb1 = bb - B*x */
2715       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2716       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2717 
2718       /* local sweep */
2719       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2720     }
2721   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2722     if (flag & SOR_ZERO_INITIAL_GUESS) {
2723       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2724       its--;
2725     }
2726     while (its--) {
2727       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2728       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2729 
2730       /* update rhs: bb1 = bb - B*x */
2731       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2732       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2733 
2734       /* local sweep */
2735       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2736     }
2737   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2738 
2739   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2740   PetscFunctionReturn(0);
2741 }
2742 
2743 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2744 
2745 #undef __FUNCT__
2746 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2747 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2748 {
2749   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2750   PetscErrorCode ierr;
2751 
2752   PetscFunctionBegin;
2753   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2754   PetscFunctionReturn(0);
2755 }
2756 
2757 
2758 /* -------------------------------------------------------------------*/
2759 static struct _MatOps MatOps_Values = {
2760        MatSetValues_MPIBAIJ,
2761        MatGetRow_MPIBAIJ,
2762        MatRestoreRow_MPIBAIJ,
2763        MatMult_MPIBAIJ,
2764 /* 4*/ MatMultAdd_MPIBAIJ,
2765        MatMultTranspose_MPIBAIJ,
2766        MatMultTransposeAdd_MPIBAIJ,
2767        0,
2768        0,
2769        0,
2770 /*10*/ 0,
2771        0,
2772        0,
2773        MatSOR_MPIBAIJ,
2774        MatTranspose_MPIBAIJ,
2775 /*15*/ MatGetInfo_MPIBAIJ,
2776        MatEqual_MPIBAIJ,
2777        MatGetDiagonal_MPIBAIJ,
2778        MatDiagonalScale_MPIBAIJ,
2779        MatNorm_MPIBAIJ,
2780 /*20*/ MatAssemblyBegin_MPIBAIJ,
2781        MatAssemblyEnd_MPIBAIJ,
2782        MatSetOption_MPIBAIJ,
2783        MatZeroEntries_MPIBAIJ,
2784 /*24*/ MatZeroRows_MPIBAIJ,
2785        0,
2786        0,
2787        0,
2788        0,
2789 /*29*/ MatSetUp_MPIBAIJ,
2790        0,
2791        0,
2792        0,
2793        0,
2794 /*34*/ MatDuplicate_MPIBAIJ,
2795        0,
2796        0,
2797        0,
2798        0,
2799 /*39*/ MatAXPY_MPIBAIJ,
2800        MatGetSubMatrices_MPIBAIJ,
2801        MatIncreaseOverlap_MPIBAIJ,
2802        MatGetValues_MPIBAIJ,
2803        MatCopy_MPIBAIJ,
2804 /*44*/ 0,
2805        MatScale_MPIBAIJ,
2806        0,
2807        0,
2808        0,
2809 /*49*/ 0,
2810        0,
2811        0,
2812        0,
2813        0,
2814 /*54*/ MatFDColoringCreate_MPIBAIJ,
2815        0,
2816        MatSetUnfactored_MPIBAIJ,
2817        MatPermute_MPIBAIJ,
2818        MatSetValuesBlocked_MPIBAIJ,
2819 /*59*/ MatGetSubMatrix_MPIBAIJ,
2820        MatDestroy_MPIBAIJ,
2821        MatView_MPIBAIJ,
2822        0,
2823        0,
2824 /*64*/ 0,
2825        0,
2826        0,
2827        0,
2828        0,
2829 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2830        0,
2831        0,
2832        0,
2833        0,
2834 /*74*/ 0,
2835        MatFDColoringApply_BAIJ,
2836        0,
2837        0,
2838        0,
2839 /*79*/ 0,
2840        0,
2841        0,
2842        0,
2843        MatLoad_MPIBAIJ,
2844 /*84*/ 0,
2845        0,
2846        0,
2847        0,
2848        0,
2849 /*89*/ 0,
2850        0,
2851        0,
2852        0,
2853        0,
2854 /*94*/ 0,
2855        0,
2856        0,
2857        0,
2858        0,
2859 /*99*/ 0,
2860        0,
2861        0,
2862        0,
2863        0,
2864 /*104*/0,
2865        MatRealPart_MPIBAIJ,
2866        MatImaginaryPart_MPIBAIJ,
2867        0,
2868        0,
2869 /*109*/0,
2870        0,
2871        0,
2872        0,
2873        0,
2874 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2875        0,
2876        MatGetGhosts_MPIBAIJ,
2877        0,
2878        0,
2879 /*119*/0,
2880        0,
2881        0,
2882        0,
2883        0,
2884 /*124*/0,
2885        0,
2886        MatInvertBlockDiagonal_MPIBAIJ
2887 };
2888 
2889 EXTERN_C_BEGIN
2890 #undef __FUNCT__
2891 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2892 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2893 {
2894   PetscFunctionBegin;
2895   *a = ((Mat_MPIBAIJ *)A->data)->A;
2896   PetscFunctionReturn(0);
2897 }
2898 EXTERN_C_END
2899 
2900 EXTERN_C_BEGIN
2901 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2902 EXTERN_C_END
2903 
2904 EXTERN_C_BEGIN
2905 #undef __FUNCT__
2906 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2907 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2908 {
2909   PetscInt       m,rstart,cstart,cend;
2910   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2911   const PetscInt *JJ=0;
2912   PetscScalar    *values=0;
2913   PetscErrorCode ierr;
2914 
2915   PetscFunctionBegin;
2916   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2917   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2918   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2919   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2920   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2921   m      = B->rmap->n/bs;
2922   rstart = B->rmap->rstart/bs;
2923   cstart = B->cmap->rstart/bs;
2924   cend   = B->cmap->rend/bs;
2925 
2926   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2927   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2928   for (i=0; i<m; i++) {
2929     nz = ii[i+1] - ii[i];
2930     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2931     nz_max = PetscMax(nz_max,nz);
2932     JJ  = jj + ii[i];
2933     for (j=0; j<nz; j++) {
2934       if (*JJ >= cstart) break;
2935       JJ++;
2936     }
2937     d = 0;
2938     for (; j<nz; j++) {
2939       if (*JJ++ >= cend) break;
2940       d++;
2941     }
2942     d_nnz[i] = d;
2943     o_nnz[i] = nz - d;
2944   }
2945   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2946   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2947 
2948   values = (PetscScalar*)V;
2949   if (!values) {
2950     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2951     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2952   }
2953   for (i=0; i<m; i++) {
2954     PetscInt          row    = i + rstart;
2955     PetscInt          ncols  = ii[i+1] - ii[i];
2956     const PetscInt    *icols = jj + ii[i];
2957     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2958     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2959   }
2960 
2961   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2962   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2963   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2964   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2965   PetscFunctionReturn(0);
2966 }
2967 EXTERN_C_END
2968 
2969 #undef __FUNCT__
2970 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2971 /*@C
2972    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2973    (the default parallel PETSc format).
2974 
2975    Collective on MPI_Comm
2976 
2977    Input Parameters:
2978 +  A - the matrix
2979 .  bs - the block size
2980 .  i - the indices into j for the start of each local row (starts with zero)
2981 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2982 -  v - optional values in the matrix
2983 
2984    Level: developer
2985 
2986 .keywords: matrix, aij, compressed row, sparse, parallel
2987 
2988 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2989 @*/
2990 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2991 {
2992   PetscErrorCode ierr;
2993 
2994   PetscFunctionBegin;
2995   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2996   PetscValidType(B,1);
2997   PetscValidLogicalCollectiveInt(B,bs,2);
2998   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2999   PetscFunctionReturn(0);
3000 }
3001 
3002 EXTERN_C_BEGIN
3003 #undef __FUNCT__
3004 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3005 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
3006 {
3007   Mat_MPIBAIJ    *b;
3008   PetscErrorCode ierr;
3009   PetscInt       i;
3010   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3011 
3012   PetscFunctionBegin;
3013   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3014   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3015 
3016   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3017   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3018   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3019   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3020 
3021   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3022   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3023   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3024   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3025   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3026 
3027   if (d_nnz) {
3028     for (i=0; i<B->rmap->n/bs; i++) {
3029       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3030     }
3031   }
3032   if (o_nnz) {
3033     for (i=0; i<B->rmap->n/bs; i++) {
3034       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3035     }
3036   }
3037 
3038   b = (Mat_MPIBAIJ*)B->data;
3039   b->bs2 = bs*bs;
3040   b->mbs = B->rmap->n/bs;
3041   b->nbs = B->cmap->n/bs;
3042   b->Mbs = B->rmap->N/bs;
3043   b->Nbs = B->cmap->N/bs;
3044 
3045   for (i=0; i<=b->size; i++) {
3046     b->rangebs[i] = B->rmap->range[i]/bs;
3047   }
3048   b->rstartbs = B->rmap->rstart/bs;
3049   b->rendbs   = B->rmap->rend/bs;
3050   b->cstartbs = B->cmap->rstart/bs;
3051   b->cendbs   = B->cmap->rend/bs;
3052 
3053   if (!B->preallocated) {
3054     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3055     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3056     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3057     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3058     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3059     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3060     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3061     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3062     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3063   }
3064 
3065   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3066   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3067   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3068   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3069   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3070   B->preallocated = PETSC_TRUE;
3071   PetscFunctionReturn(0);
3072 }
3073 EXTERN_C_END
3074 
3075 EXTERN_C_BEGIN
3076 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3077 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3078 EXTERN_C_END
3079 
3080 
3081 EXTERN_C_BEGIN
3082 #undef __FUNCT__
3083 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3084 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3085 {
3086   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3087   PetscErrorCode ierr;
3088   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3089   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3090   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3091 
3092   PetscFunctionBegin;
3093   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3094   ii[0] = 0;
3095   CHKMEMQ;
3096   for (i=0; i<M; i++) {
3097     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3098     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3099     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3100     /* remove one from count of matrix has diagonal */
3101     for (j=id[i]; j<id[i+1]; j++) {
3102       if (jd[j] == i) {ii[i+1]--;break;}
3103     }
3104   CHKMEMQ;
3105   }
3106   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3107   cnt = 0;
3108   for (i=0; i<M; i++) {
3109     for (j=io[i]; j<io[i+1]; j++) {
3110       if (garray[jo[j]] > rstart) break;
3111       jj[cnt++] = garray[jo[j]];
3112   CHKMEMQ;
3113     }
3114     for (k=id[i]; k<id[i+1]; k++) {
3115       if (jd[k] != i) {
3116         jj[cnt++] = rstart + jd[k];
3117   CHKMEMQ;
3118       }
3119     }
3120     for (;j<io[i+1]; j++) {
3121       jj[cnt++] = garray[jo[j]];
3122   CHKMEMQ;
3123     }
3124   }
3125   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3126   PetscFunctionReturn(0);
3127 }
3128 EXTERN_C_END
3129 
3130 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3131 EXTERN_C_BEGIN
3132 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3133 EXTERN_C_END
3134 
3135 EXTERN_C_BEGIN
3136 #undef __FUNCT__
3137 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3138 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3139 {
3140   PetscErrorCode ierr;
3141   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3142   Mat            B;
3143   Mat_MPIAIJ     *b;
3144 
3145   PetscFunctionBegin;
3146   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3147 
3148   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3149   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3150   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3151   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3152   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3153   b = (Mat_MPIAIJ*) B->data;
3154 
3155   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3156   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3157   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3158   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3159   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3160   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3161   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3162   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3163   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3164   if (reuse == MAT_REUSE_MATRIX) {
3165     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3166   } else {
3167    *newmat = B;
3168   }
3169   PetscFunctionReturn(0);
3170 }
3171 EXTERN_C_END
3172 
3173 EXTERN_C_BEGIN
3174 #if defined(PETSC_HAVE_MUMPS)
3175 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3176 #endif
3177 EXTERN_C_END
3178 
3179 /*MC
3180    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3181 
3182    Options Database Keys:
3183 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3184 . -mat_block_size <bs> - set the blocksize used to store the matrix
3185 - -mat_use_hash_table <fact>
3186 
3187   Level: beginner
3188 
3189 .seealso: MatCreateMPIBAIJ
3190 M*/
3191 
3192 EXTERN_C_BEGIN
3193 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3194 EXTERN_C_END
3195 
3196 EXTERN_C_BEGIN
3197 #undef __FUNCT__
3198 #define __FUNCT__ "MatCreate_MPIBAIJ"
3199 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3200 {
3201   Mat_MPIBAIJ    *b;
3202   PetscErrorCode ierr;
3203   PetscBool      flg;
3204 
3205   PetscFunctionBegin;
3206   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3207   B->data = (void*)b;
3208 
3209   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3210   B->assembled  = PETSC_FALSE;
3211 
3212   B->insertmode = NOT_SET_VALUES;
3213   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3214   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3215 
3216   /* build local table of row and column ownerships */
3217   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3218 
3219   /* build cache for off array entries formed */
3220   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3221   b->donotstash  = PETSC_FALSE;
3222   b->colmap      = PETSC_NULL;
3223   b->garray      = PETSC_NULL;
3224   b->roworiented = PETSC_TRUE;
3225 
3226   /* stuff used in block assembly */
3227   b->barray       = 0;
3228 
3229   /* stuff used for matrix vector multiply */
3230   b->lvec         = 0;
3231   b->Mvctx        = 0;
3232 
3233   /* stuff for MatGetRow() */
3234   b->rowindices   = 0;
3235   b->rowvalues    = 0;
3236   b->getrowactive = PETSC_FALSE;
3237 
3238   /* hash table stuff */
3239   b->ht           = 0;
3240   b->hd           = 0;
3241   b->ht_size      = 0;
3242   b->ht_flag      = PETSC_FALSE;
3243   b->ht_fact      = 0;
3244   b->ht_total_ct  = 0;
3245   b->ht_insert_ct = 0;
3246 
3247   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3248   b->ijonly       = PETSC_FALSE;
3249 
3250   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3251     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3252     if (flg) {
3253       PetscReal fact = 1.39;
3254       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3255       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3256       if (fact <= 1.0) fact = 1.39;
3257       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3258       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3259     }
3260   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3261 
3262 #if defined(PETSC_HAVE_MUMPS)
3263   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3264 #endif
3265   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3266                                      "MatConvert_MPIBAIJ_MPIAdj",
3267                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3268   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3269                                      "MatConvert_MPIBAIJ_MPIAIJ",
3270                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3271   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3272                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3273                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3274   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3275                                      "MatStoreValues_MPIBAIJ",
3276                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3277   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3278                                      "MatRetrieveValues_MPIBAIJ",
3279                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3280   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3281                                      "MatGetDiagonalBlock_MPIBAIJ",
3282                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3283   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3284                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3285                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3286   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3287                                      "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3288                                      MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3289   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3290                                      "MatDiagonalScaleLocal_MPIBAIJ",
3291                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3292   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3293                                      "MatSetHashTableFactor_MPIBAIJ",
3294                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3295   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3296                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3297                                       MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3298   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3299   PetscFunctionReturn(0);
3300 }
3301 EXTERN_C_END
3302 
3303 /*MC
3304    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3305 
3306    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3307    and MATMPIBAIJ otherwise.
3308 
3309    Options Database Keys:
3310 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3311 
3312   Level: beginner
3313 
3314 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3315 M*/
3316 
3317 #undef __FUNCT__
3318 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3319 /*@C
3320    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3321    (block compressed row).  For good matrix assembly performance
3322    the user should preallocate the matrix storage by setting the parameters
3323    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3324    performance can be increased by more than a factor of 50.
3325 
3326    Collective on Mat
3327 
3328    Input Parameters:
3329 +  A - the matrix
3330 .  bs   - size of blockk
3331 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3332            submatrix  (same for all local rows)
3333 .  d_nnz - array containing the number of block nonzeros in the various block rows
3334            of the in diagonal portion of the local (possibly different for each block
3335            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3336            set it even if it is zero.
3337 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3338            submatrix (same for all local rows).
3339 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3340            off-diagonal portion of the local submatrix (possibly different for
3341            each block row) or PETSC_NULL.
3342 
3343    If the *_nnz parameter is given then the *_nz parameter is ignored
3344 
3345    Options Database Keys:
3346 +   -mat_block_size - size of the blocks to use
3347 -   -mat_use_hash_table <fact>
3348 
3349    Notes:
3350    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3351    than it must be used on all processors that share the object for that argument.
3352 
3353    Storage Information:
3354    For a square global matrix we define each processor's diagonal portion
3355    to be its local rows and the corresponding columns (a square submatrix);
3356    each processor's off-diagonal portion encompasses the remainder of the
3357    local matrix (a rectangular submatrix).
3358 
3359    The user can specify preallocated storage for the diagonal part of
3360    the local submatrix with either d_nz or d_nnz (not both).  Set
3361    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3362    memory allocation.  Likewise, specify preallocated storage for the
3363    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3364 
3365    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3366    the figure below we depict these three local rows and all columns (0-11).
3367 
3368 .vb
3369            0 1 2 3 4 5 6 7 8 9 10 11
3370           -------------------
3371    row 3  |  o o o d d d o o o o o o
3372    row 4  |  o o o d d d o o o o o o
3373    row 5  |  o o o d d d o o o o o o
3374           -------------------
3375 .ve
3376 
3377    Thus, any entries in the d locations are stored in the d (diagonal)
3378    submatrix, and any entries in the o locations are stored in the
3379    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3380    stored simply in the MATSEQBAIJ format for compressed row storage.
3381 
3382    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3383    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3384    In general, for PDE problems in which most nonzeros are near the diagonal,
3385    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3386    or you will get TERRIBLE performance; see the users' manual chapter on
3387    matrices.
3388 
3389    You can call MatGetInfo() to get information on how effective the preallocation was;
3390    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3391    You can also run with the option -info and look for messages with the string
3392    malloc in them to see if additional memory allocation was needed.
3393 
3394    Level: intermediate
3395 
3396 .keywords: matrix, block, aij, compressed row, sparse, parallel
3397 
3398 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3399 @*/
3400 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3401 {
3402   PetscErrorCode ierr;
3403 
3404   PetscFunctionBegin;
3405   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3406   PetscValidType(B,1);
3407   PetscValidLogicalCollectiveInt(B,bs,2);
3408   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3409   PetscFunctionReturn(0);
3410 }
3411 
3412 #undef __FUNCT__
3413 #define __FUNCT__ "MatCreateBAIJ"
3414 /*@C
3415    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3416    (block compressed row).  For good matrix assembly performance
3417    the user should preallocate the matrix storage by setting the parameters
3418    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3419    performance can be increased by more than a factor of 50.
3420 
3421    Collective on MPI_Comm
3422 
3423    Input Parameters:
3424 +  comm - MPI communicator
3425 .  bs   - size of blockk
3426 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3427            This value should be the same as the local size used in creating the
3428            y vector for the matrix-vector product y = Ax.
3429 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3430            This value should be the same as the local size used in creating the
3431            x vector for the matrix-vector product y = Ax.
3432 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3433 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3434 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3435            submatrix  (same for all local rows)
3436 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3437            of the in diagonal portion of the local (possibly different for each block
3438            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3439            and set it even if it is zero.
3440 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3441            submatrix (same for all local rows).
3442 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3443            off-diagonal portion of the local submatrix (possibly different for
3444            each block row) or PETSC_NULL.
3445 
3446    Output Parameter:
3447 .  A - the matrix
3448 
3449    Options Database Keys:
3450 +   -mat_block_size - size of the blocks to use
3451 -   -mat_use_hash_table <fact>
3452 
3453    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3454    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3455    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3456 
3457    Notes:
3458    If the *_nnz parameter is given then the *_nz parameter is ignored
3459 
3460    A nonzero block is any block that as 1 or more nonzeros in it
3461 
3462    The user MUST specify either the local or global matrix dimensions
3463    (possibly both).
3464 
3465    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3466    than it must be used on all processors that share the object for that argument.
3467 
3468    Storage Information:
3469    For a square global matrix we define each processor's diagonal portion
3470    to be its local rows and the corresponding columns (a square submatrix);
3471    each processor's off-diagonal portion encompasses the remainder of the
3472    local matrix (a rectangular submatrix).
3473 
3474    The user can specify preallocated storage for the diagonal part of
3475    the local submatrix with either d_nz or d_nnz (not both).  Set
3476    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3477    memory allocation.  Likewise, specify preallocated storage for the
3478    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3479 
3480    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3481    the figure below we depict these three local rows and all columns (0-11).
3482 
3483 .vb
3484            0 1 2 3 4 5 6 7 8 9 10 11
3485           -------------------
3486    row 3  |  o o o d d d o o o o o o
3487    row 4  |  o o o d d d o o o o o o
3488    row 5  |  o o o d d d o o o o o o
3489           -------------------
3490 .ve
3491 
3492    Thus, any entries in the d locations are stored in the d (diagonal)
3493    submatrix, and any entries in the o locations are stored in the
3494    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3495    stored simply in the MATSEQBAIJ format for compressed row storage.
3496 
3497    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3498    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3499    In general, for PDE problems in which most nonzeros are near the diagonal,
3500    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3501    or you will get TERRIBLE performance; see the users' manual chapter on
3502    matrices.
3503 
3504    Level: intermediate
3505 
3506 .keywords: matrix, block, aij, compressed row, sparse, parallel
3507 
3508 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3509 @*/
3510 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3511 {
3512   PetscErrorCode ierr;
3513   PetscMPIInt    size;
3514 
3515   PetscFunctionBegin;
3516   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3517   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3518   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3519   if (size > 1) {
3520     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3521     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3522   } else {
3523     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3524     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3525   }
3526   PetscFunctionReturn(0);
3527 }
3528 
3529 #undef __FUNCT__
3530 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3531 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3532 {
3533   Mat            mat;
3534   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3535   PetscErrorCode ierr;
3536   PetscInt       len=0;
3537 
3538   PetscFunctionBegin;
3539   *newmat       = 0;
3540   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3541   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3542   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3543   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3544 
3545   mat->factortype   = matin->factortype;
3546   mat->preallocated = PETSC_TRUE;
3547   mat->assembled    = PETSC_TRUE;
3548   mat->insertmode   = NOT_SET_VALUES;
3549 
3550   a      = (Mat_MPIBAIJ*)mat->data;
3551   mat->rmap->bs  = matin->rmap->bs;
3552   a->bs2   = oldmat->bs2;
3553   a->mbs   = oldmat->mbs;
3554   a->nbs   = oldmat->nbs;
3555   a->Mbs   = oldmat->Mbs;
3556   a->Nbs   = oldmat->Nbs;
3557 
3558   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3559   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3560 
3561   a->size         = oldmat->size;
3562   a->rank         = oldmat->rank;
3563   a->donotstash   = oldmat->donotstash;
3564   a->roworiented  = oldmat->roworiented;
3565   a->rowindices   = 0;
3566   a->rowvalues    = 0;
3567   a->getrowactive = PETSC_FALSE;
3568   a->barray       = 0;
3569   a->rstartbs     = oldmat->rstartbs;
3570   a->rendbs       = oldmat->rendbs;
3571   a->cstartbs     = oldmat->cstartbs;
3572   a->cendbs       = oldmat->cendbs;
3573 
3574   /* hash table stuff */
3575   a->ht           = 0;
3576   a->hd           = 0;
3577   a->ht_size      = 0;
3578   a->ht_flag      = oldmat->ht_flag;
3579   a->ht_fact      = oldmat->ht_fact;
3580   a->ht_total_ct  = 0;
3581   a->ht_insert_ct = 0;
3582 
3583   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3584   if (oldmat->colmap) {
3585 #if defined (PETSC_USE_CTABLE)
3586   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3587 #else
3588   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3589   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3590   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3591 #endif
3592   } else a->colmap = 0;
3593 
3594   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3595     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3596     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3597     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3598   } else a->garray = 0;
3599 
3600   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3601   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3602   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3603   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3604   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3605 
3606   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3607   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3608   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3609   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3610   ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3611   *newmat = mat;
3612 
3613   PetscFunctionReturn(0);
3614 }
3615 
3616 #undef __FUNCT__
3617 #define __FUNCT__ "MatLoad_MPIBAIJ"
3618 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3619 {
3620   PetscErrorCode ierr;
3621   int            fd;
3622   PetscInt       i,nz,j,rstart,rend;
3623   PetscScalar    *vals,*buf;
3624   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3625   MPI_Status     status;
3626   PetscMPIInt    rank,size,maxnz;
3627   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3628   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3629   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3630   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3631   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3632   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3633 
3634   PetscFunctionBegin;
3635   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3636     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3637   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3638 
3639   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3640   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3641   if (!rank) {
3642     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3643     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3644     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3645   }
3646 
3647   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3648 
3649   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3650   M = header[1]; N = header[2];
3651 
3652   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3653   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3654   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3655 
3656   /* If global sizes are set, check if they are consistent with that given in the file */
3657   if (sizesset) {
3658     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3659   }
3660   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3661   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3662 
3663   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3664 
3665   /*
3666      This code adds extra rows to make sure the number of rows is
3667      divisible by the blocksize
3668   */
3669   Mbs        = M/bs;
3670   extra_rows = bs - M + bs*Mbs;
3671   if (extra_rows == bs) extra_rows = 0;
3672   else                  Mbs++;
3673   if (extra_rows && !rank) {
3674     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3675   }
3676 
3677   /* determine ownership of all rows */
3678   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3679     mbs        = Mbs/size + ((Mbs % size) > rank);
3680     m          = mbs*bs;
3681   } else { /* User set */
3682     m          = newmat->rmap->n;
3683     mbs        = m/bs;
3684   }
3685   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3686   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3687 
3688   /* process 0 needs enough room for process with most rows */
3689   if (!rank) {
3690     mmax = rowners[1];
3691     for (i=2; i<=size; i++) {
3692       mmax = PetscMax(mmax,rowners[i]);
3693     }
3694     mmax*=bs;
3695   } else mmax = m;
3696 
3697   rowners[0] = 0;
3698   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3699   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3700   rstart = rowners[rank];
3701   rend   = rowners[rank+1];
3702 
3703   /* distribute row lengths to all processors */
3704   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3705   if (!rank) {
3706     mend = m;
3707     if (size == 1) mend = mend - extra_rows;
3708     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3709     for (j=mend; j<m; j++) locrowlens[j] = 1;
3710     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3711     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3712     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3713     for (j=0; j<m; j++) {
3714       procsnz[0] += locrowlens[j];
3715     }
3716     for (i=1; i<size; i++) {
3717       mend = browners[i+1] - browners[i];
3718       if (i == size-1) mend = mend - extra_rows;
3719       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3720       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3721       /* calculate the number of nonzeros on each processor */
3722       for (j=0; j<browners[i+1]-browners[i]; j++) {
3723         procsnz[i] += rowlengths[j];
3724       }
3725       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3726     }
3727     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3728   } else {
3729     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3730   }
3731 
3732   if (!rank) {
3733     /* determine max buffer needed and allocate it */
3734     maxnz = procsnz[0];
3735     for (i=1; i<size; i++) {
3736       maxnz = PetscMax(maxnz,procsnz[i]);
3737     }
3738     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3739 
3740     /* read in my part of the matrix column indices  */
3741     nz     = procsnz[0];
3742     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3743     mycols = ibuf;
3744     if (size == 1)  nz -= extra_rows;
3745     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3746     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3747 
3748     /* read in every ones (except the last) and ship off */
3749     for (i=1; i<size-1; i++) {
3750       nz   = procsnz[i];
3751       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3752       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3753     }
3754     /* read in the stuff for the last proc */
3755     if (size != 1) {
3756       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3757       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3758       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3759       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3760     }
3761     ierr = PetscFree(cols);CHKERRQ(ierr);
3762   } else {
3763     /* determine buffer space needed for message */
3764     nz = 0;
3765     for (i=0; i<m; i++) {
3766       nz += locrowlens[i];
3767     }
3768     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3769     mycols = ibuf;
3770     /* receive message of column indices*/
3771     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3772     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3773     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3774   }
3775 
3776   /* loop over local rows, determining number of off diagonal entries */
3777   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3778   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3779   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3780   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3781   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3782   rowcount = 0; nzcount = 0;
3783   for (i=0; i<mbs; i++) {
3784     dcount  = 0;
3785     odcount = 0;
3786     for (j=0; j<bs; j++) {
3787       kmax = locrowlens[rowcount];
3788       for (k=0; k<kmax; k++) {
3789         tmp = mycols[nzcount++]/bs;
3790         if (!mask[tmp]) {
3791           mask[tmp] = 1;
3792           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3793           else masked1[dcount++] = tmp;
3794         }
3795       }
3796       rowcount++;
3797     }
3798 
3799     dlens[i]  = dcount;
3800     odlens[i] = odcount;
3801 
3802     /* zero out the mask elements we set */
3803     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3804     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3805   }
3806 
3807 
3808   if (!sizesset) {
3809     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3810   }
3811   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3812 
3813   if (!rank) {
3814     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3815     /* read in my part of the matrix numerical values  */
3816     nz = procsnz[0];
3817     vals = buf;
3818     mycols = ibuf;
3819     if (size == 1)  nz -= extra_rows;
3820     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3821     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3822 
3823     /* insert into matrix */
3824     jj      = rstart*bs;
3825     for (i=0; i<m; i++) {
3826       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3827       mycols += locrowlens[i];
3828       vals   += locrowlens[i];
3829       jj++;
3830     }
3831     /* read in other processors (except the last one) and ship out */
3832     for (i=1; i<size-1; i++) {
3833       nz   = procsnz[i];
3834       vals = buf;
3835       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3836       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3837     }
3838     /* the last proc */
3839     if (size != 1) {
3840       nz   = procsnz[i] - extra_rows;
3841       vals = buf;
3842       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3843       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3844       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3845     }
3846     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3847   } else {
3848     /* receive numeric values */
3849     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3850 
3851     /* receive message of values*/
3852     vals   = buf;
3853     mycols = ibuf;
3854     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3855 
3856     /* insert into matrix */
3857     jj      = rstart*bs;
3858     for (i=0; i<m; i++) {
3859       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3860       mycols += locrowlens[i];
3861       vals   += locrowlens[i];
3862       jj++;
3863     }
3864   }
3865   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3866   ierr = PetscFree(buf);CHKERRQ(ierr);
3867   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3868   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3869   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3870   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3871   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3872   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3873 
3874   PetscFunctionReturn(0);
3875 }
3876 
3877 #undef __FUNCT__
3878 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3879 /*@
3880    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3881 
3882    Input Parameters:
3883 .  mat  - the matrix
3884 .  fact - factor
3885 
3886    Not Collective, each process can use a different factor
3887 
3888    Level: advanced
3889 
3890   Notes:
3891    This can also be set by the command line option: -mat_use_hash_table <fact>
3892 
3893 .keywords: matrix, hashtable, factor, HT
3894 
3895 .seealso: MatSetOption()
3896 @*/
3897 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3898 {
3899   PetscErrorCode ierr;
3900 
3901   PetscFunctionBegin;
3902   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3903   PetscFunctionReturn(0);
3904 }
3905 
3906 EXTERN_C_BEGIN
3907 #undef __FUNCT__
3908 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3909 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3910 {
3911   Mat_MPIBAIJ *baij;
3912 
3913   PetscFunctionBegin;
3914   baij = (Mat_MPIBAIJ*)mat->data;
3915   baij->ht_fact = fact;
3916   PetscFunctionReturn(0);
3917 }
3918 EXTERN_C_END
3919 
3920 #undef __FUNCT__
3921 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3922 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3923 {
3924   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3925   PetscFunctionBegin;
3926   *Ad     = a->A;
3927   *Ao     = a->B;
3928   *colmap = a->garray;
3929   PetscFunctionReturn(0);
3930 }
3931 
3932 /*
3933     Special version for direct calls from Fortran (to eliminate two function call overheads
3934 */
3935 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3936 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3937 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3938 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3939 #endif
3940 
3941 #undef __FUNCT__
3942 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3943 /*@C
3944   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3945 
3946   Collective on Mat
3947 
3948   Input Parameters:
3949 + mat - the matrix
3950 . min - number of input rows
3951 . im - input rows
3952 . nin - number of input columns
3953 . in - input columns
3954 . v - numerical values input
3955 - addvin - INSERT_VALUES or ADD_VALUES
3956 
3957   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3958 
3959   Level: advanced
3960 
3961 .seealso:   MatSetValuesBlocked()
3962 @*/
3963 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3964 {
3965   /* convert input arguments to C version */
3966   Mat             mat = *matin;
3967   PetscInt        m = *min, n = *nin;
3968   InsertMode      addv = *addvin;
3969 
3970   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3971   const MatScalar *value;
3972   MatScalar       *barray=baij->barray;
3973   PetscBool       roworiented = baij->roworiented;
3974   PetscErrorCode  ierr;
3975   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3976   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3977   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3978 
3979   PetscFunctionBegin;
3980   /* tasks normally handled by MatSetValuesBlocked() */
3981   if (mat->insertmode == NOT_SET_VALUES) {
3982     mat->insertmode = addv;
3983   }
3984 #if defined(PETSC_USE_DEBUG)
3985   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3986   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3987 #endif
3988   if (mat->assembled) {
3989     mat->was_assembled = PETSC_TRUE;
3990     mat->assembled     = PETSC_FALSE;
3991   }
3992   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3993 
3994 
3995   if (!barray) {
3996     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3997     baij->barray = barray;
3998   }
3999 
4000   if (roworiented) {
4001     stepval = (n-1)*bs;
4002   } else {
4003     stepval = (m-1)*bs;
4004   }
4005   for (i=0; i<m; i++) {
4006     if (im[i] < 0) continue;
4007 #if defined(PETSC_USE_DEBUG)
4008     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4009 #endif
4010     if (im[i] >= rstart && im[i] < rend) {
4011       row = im[i] - rstart;
4012       for (j=0; j<n; j++) {
4013         /* If NumCol = 1 then a copy is not required */
4014         if ((roworiented) && (n == 1)) {
4015           barray = (MatScalar*)v + i*bs2;
4016         } else if ((!roworiented) && (m == 1)) {
4017           barray = (MatScalar*)v + j*bs2;
4018         } else { /* Here a copy is required */
4019           if (roworiented) {
4020             value = v + i*(stepval+bs)*bs + j*bs;
4021           } else {
4022             value = v + j*(stepval+bs)*bs + i*bs;
4023           }
4024           for (ii=0; ii<bs; ii++,value+=stepval) {
4025             for (jj=0; jj<bs; jj++) {
4026               *barray++  = *value++;
4027             }
4028           }
4029           barray -=bs2;
4030         }
4031 
4032         if (in[j] >= cstart && in[j] < cend) {
4033           col  = in[j] - cstart;
4034           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4035         }
4036         else if (in[j] < 0) continue;
4037 #if defined(PETSC_USE_DEBUG)
4038         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4039 #endif
4040         else {
4041           if (mat->was_assembled) {
4042             if (!baij->colmap) {
4043               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4044             }
4045 
4046 #if defined(PETSC_USE_DEBUG)
4047 #if defined (PETSC_USE_CTABLE)
4048             { PetscInt data;
4049               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4050               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4051             }
4052 #else
4053             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4054 #endif
4055 #endif
4056 #if defined (PETSC_USE_CTABLE)
4057             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4058             col  = (col - 1)/bs;
4059 #else
4060             col = (baij->colmap[in[j]] - 1)/bs;
4061 #endif
4062             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4063               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4064               col =  in[j];
4065             }
4066           }
4067           else col = in[j];
4068           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4069         }
4070       }
4071     } else {
4072       if (!baij->donotstash) {
4073         if (roworiented) {
4074           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4075         } else {
4076           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4077         }
4078       }
4079     }
4080   }
4081 
4082   /* task normally handled by MatSetValuesBlocked() */
4083   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4084   PetscFunctionReturn(0);
4085 }
4086 
4087 #undef __FUNCT__
4088 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4089 /*@
4090      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4091          CSR format the local rows.
4092 
4093    Collective on MPI_Comm
4094 
4095    Input Parameters:
4096 +  comm - MPI communicator
4097 .  bs - the block size, only a block size of 1 is supported
4098 .  m - number of local rows (Cannot be PETSC_DECIDE)
4099 .  n - This value should be the same as the local size used in creating the
4100        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4101        calculated if N is given) For square matrices n is almost always m.
4102 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4103 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4104 .   i - row indices
4105 .   j - column indices
4106 -   a - matrix values
4107 
4108    Output Parameter:
4109 .   mat - the matrix
4110 
4111    Level: intermediate
4112 
4113    Notes:
4114        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4115      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4116      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4117 
4118        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4119 
4120 .keywords: matrix, aij, compressed row, sparse, parallel
4121 
4122 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4123           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4124 @*/
4125 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4126 {
4127   PetscErrorCode ierr;
4128 
4129 
4130   PetscFunctionBegin;
4131   if (i[0]) {
4132     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4133   }
4134   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4135   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4136   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4137   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4138   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4139   PetscFunctionReturn(0);
4140 }
4141