xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision a6dfd86ebdf75fa024070af26d51b62fd16650a3)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++) {
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++) {
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig) {
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j];
226           else             value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         } else if (in[j] < 0) continue;
230 #if defined(PETSC_USE_DEBUG)
231         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
232 #endif
233         else {
234           if (mat->was_assembled) {
235             if (!baij->colmap) {
236               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined (PETSC_USE_CTABLE)
239             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
240             col  = col - 1;
241 #else
242             col = baij->colmap[in[j]/bs] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
245               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
246               col =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
248               B = baij->B;
249               b = (Mat_SeqBAIJ*)(B)->data;
250               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
251               ba=b->a;
252             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
253             else col += in[j]%bs;
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j];
256           else             value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         mat->assembled = PETSC_FALSE;
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray=baij->barray;
283   PetscBool         roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if (!barray) {
291     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) {
296     stepval = (n-1)*bs;
297   } else {
298     stepval = (m-1)*bs;
299   }
300   for (i=0; i<m; i++) {
301     if (im[i] < 0) continue;
302 #if defined(PETSC_USE_DEBUG)
303     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
304 #endif
305     if (im[i] >= rstart && im[i] < rend) {
306       row = im[i] - rstart;
307       for (j=0; j<n; j++) {
308         /* If NumCol = 1 then a copy is not required */
309         if ((roworiented) && (n == 1)) {
310           barray = (MatScalar*)v + i*bs2;
311         } else if ((!roworiented) && (m == 1)) {
312           barray = (MatScalar*)v + j*bs2;
313         } else { /* Here a copy is required */
314           if (roworiented) {
315             value = v + (i*(stepval+bs) + j)*bs;
316           } else {
317             value = v + (j*(stepval+bs) + i)*bs;
318           }
319           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
320             for (jj=0; jj<bs; jj++) {
321               barray[jj]  = value[jj];
322             }
323             barray += bs;
324           }
325           barray -= bs2;
326         }
327 
328         if (in[j] >= cstart && in[j] < cend) {
329           col  = in[j] - cstart;
330           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
331         }
332         else if (in[j] < 0) continue;
333 #if defined(PETSC_USE_DEBUG)
334         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
335 #endif
336         else {
337           if (mat->was_assembled) {
338             if (!baij->colmap) {
339               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
340             }
341 
342 #if defined(PETSC_USE_DEBUG)
343 #if defined (PETSC_USE_CTABLE)
344             { PetscInt data;
345               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
346               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
347             }
348 #else
349             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
350 #endif
351 #endif
352 #if defined (PETSC_USE_CTABLE)
353             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
354             col  = (col - 1)/bs;
355 #else
356             col = (baij->colmap[in[j]] - 1)/bs;
357 #endif
358             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
359               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
360               col =  in[j];
361             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
362           } else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
389   PetscBool      roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408       row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j];
413         else             value = v[i+j*m];
414         /* Look up PetscInto the Hash Table */
415         key = (row/bs)*Nbs+(col/bs)+1;
416         h1  = HASH(size,key,tmp);
417 
418 
419         idx = h1;
420 #if defined(PETSC_USE_DEBUG)
421         insert_ct++;
422         total_ct++;
423         if (HT[idx] != key) {
424           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
425           if (idx == size) {
426             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
427             if (idx == h1) {
428               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
429             }
430           }
431         }
432 #else
433         if (HT[idx] != key) {
434           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
435           if (idx == size) {
436             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
437             if (idx == h1) {
438               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
439             }
440           }
441         }
442 #endif
443         /* A HASH table entry is found, so insert the values at the correct address */
444         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
445         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
446       }
447     } else {
448       if (!baij->donotstash) {
449         if (roworiented) {
450           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
451         } else {
452           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
453         }
454       }
455     }
456   }
457 #if defined(PETSC_USE_DEBUG)
458   baij->ht_total_ct = total_ct;
459   baij->ht_insert_ct = insert_ct;
460 #endif
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
466 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
467 {
468   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
469   PetscBool         roworiented = baij->roworiented;
470   PetscErrorCode    ierr;
471   PetscInt          i,j,ii,jj,row,col;
472   PetscInt          rstart=baij->rstartbs;
473   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
474   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
475   PetscReal         tmp;
476   MatScalar         **HD = baij->hd,*baij_a;
477   const PetscScalar *v_t,*value;
478 #if defined(PETSC_USE_DEBUG)
479   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
480 #endif
481 
482   PetscFunctionBegin;
483 
484   if (roworiented) {
485     stepval = (n-1)*bs;
486   } else {
487     stepval = (m-1)*bs;
488   }
489   for (i=0; i<m; i++) {
490 #if defined(PETSC_USE_DEBUG)
491     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
492     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
493 #endif
494     row   = im[i];
495     v_t   = v + i*nbs2;
496     if (row >= rstart && row < rend) {
497       for (j=0; j<n; j++) {
498         col = in[j];
499 
500         /* Look up into the Hash Table */
501         key = row*Nbs+col+1;
502         h1  = HASH(size,key,tmp);
503 
504         idx = h1;
505 #if defined(PETSC_USE_DEBUG)
506         total_ct++;
507         insert_ct++;
508        if (HT[idx] != key) {
509           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
510           if (idx == size) {
511             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
512             if (idx == h1) {
513               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
514             }
515           }
516         }
517 #else
518         if (HT[idx] != key) {
519           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
520           if (idx == size) {
521             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
522             if (idx == h1) {
523               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
524             }
525           }
526         }
527 #endif
528         baij_a = HD[idx];
529         if (roworiented) {
530           /*value = v + i*(stepval+bs)*bs + j*bs;*/
531           /* value = v + (i*(stepval+bs)+j)*bs; */
532           value = v_t;
533           v_t  += bs;
534           if (addv == ADD_VALUES) {
535             for (ii=0; ii<bs; ii++,value+=stepval) {
536               for (jj=ii; jj<bs2; jj+=bs) {
537                 baij_a[jj]  += *value++;
538               }
539             }
540           } else {
541             for (ii=0; ii<bs; ii++,value+=stepval) {
542               for (jj=ii; jj<bs2; jj+=bs) {
543                 baij_a[jj]  = *value++;
544               }
545             }
546           }
547         } else {
548           value = v + j*(stepval+bs)*bs + i*bs;
549           if (addv == ADD_VALUES) {
550             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
551               for (jj=0; jj<bs; jj++) {
552                 baij_a[jj]  += *value++;
553               }
554             }
555           } else {
556             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
557               for (jj=0; jj<bs; jj++) {
558                 baij_a[jj]  = *value++;
559               }
560             }
561           }
562         }
563       }
564     } else {
565       if (!baij->donotstash) {
566         if (roworiented) {
567           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
568         } else {
569           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
570         }
571       }
572     }
573   }
574 #if defined(PETSC_USE_DEBUG)
575   baij->ht_total_ct = total_ct;
576   baij->ht_insert_ct = insert_ct;
577 #endif
578   PetscFunctionReturn(0);
579 }
580 
581 #undef __FUNCT__
582 #define __FUNCT__ "MatGetValues_MPIBAIJ"
583 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
584 {
585   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
586   PetscErrorCode ierr;
587   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
588   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
589 
590   PetscFunctionBegin;
591   for (i=0; i<m; i++) {
592     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
593     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
594     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
595       row = idxm[i] - bsrstart;
596       for (j=0; j<n; j++) {
597         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
598         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
599         if (idxn[j] >= bscstart && idxn[j] < bscend) {
600           col = idxn[j] - bscstart;
601           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
602         } else {
603           if (!baij->colmap) {
604             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
605           }
606 #if defined (PETSC_USE_CTABLE)
607           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
608           data --;
609 #else
610           data = baij->colmap[idxn[j]/bs]-1;
611 #endif
612           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
613           else {
614             col  = data + idxn[j]%bs;
615             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else {
620       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
621     }
622   }
623  PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatNorm_MPIBAIJ"
628 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
629 {
630   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
631   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
632   PetscErrorCode ierr;
633   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
634   PetscReal      sum = 0.0;
635   MatScalar      *v;
636 
637   PetscFunctionBegin;
638   if (baij->size == 1) {
639     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
640   } else {
641     if (type == NORM_FROBENIUS) {
642       v = amat->a;
643       nz = amat->nz*bs2;
644       for (i=0; i<nz; i++) {
645         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
646       }
647       v = bmat->a;
648       nz = bmat->nz*bs2;
649       for (i=0; i<nz; i++) {
650         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
651       }
652       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
653       *nrm = PetscSqrtReal(*nrm);
654     } else if (type == NORM_1) { /* max column sum */
655       PetscReal *tmp,*tmp2;
656       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
657       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
658       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
659       v = amat->a; jj = amat->j;
660       for (i=0; i<amat->nz; i++) {
661         for (j=0; j<bs; j++) {
662           col = bs*(cstart + *jj) + j; /* column index */
663           for (row=0; row<bs; row++) {
664             tmp[col] += PetscAbsScalar(*v);  v++;
665           }
666         }
667         jj++;
668       }
669       v = bmat->a; jj = bmat->j;
670       for (i=0; i<bmat->nz; i++) {
671         for (j=0; j<bs; j++) {
672           col = bs*garray[*jj] + j;
673           for (row=0; row<bs; row++) {
674             tmp[col] += PetscAbsScalar(*v); v++;
675           }
676         }
677         jj++;
678       }
679       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
680       *nrm = 0.0;
681       for (j=0; j<mat->cmap->N; j++) {
682         if (tmp2[j] > *nrm) *nrm = tmp2[j];
683       }
684       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
685     } else if (type == NORM_INFINITY) { /* max row sum */
686       PetscReal *sums;
687       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
688       sum = 0.0;
689       for (j=0; j<amat->mbs; j++) {
690         for (row=0; row<bs; row++) sums[row] = 0.0;
691         v = amat->a + bs2*amat->i[j];
692         nz = amat->i[j+1]-amat->i[j];
693         for (i=0; i<nz; i++) {
694           for (col=0; col<bs; col++) {
695             for (row=0; row<bs; row++) {
696               sums[row] += PetscAbsScalar(*v); v++;
697             }
698           }
699         }
700         v = bmat->a + bs2*bmat->i[j];
701         nz = bmat->i[j+1]-bmat->i[j];
702         for (i=0; i<nz; i++) {
703           for (col=0; col<bs; col++) {
704             for (row=0; row<bs; row++) {
705               sums[row] += PetscAbsScalar(*v); v++;
706             }
707           }
708         }
709         for (row=0; row<bs; row++) {
710           if (sums[row] > sum) sum = sums[row];
711         }
712       }
713       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
714       ierr = PetscFree(sums);CHKERRQ(ierr);
715     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
716   }
717   PetscFunctionReturn(0);
718 }
719 
720 /*
721   Creates the hash table, and sets the table
722   This table is created only once.
723   If new entried need to be added to the matrix
724   then the hash table has to be destroyed and
725   recreated.
726 */
727 #undef __FUNCT__
728 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
729 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
730 {
731   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
732   Mat            A = baij->A,B=baij->B;
733   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
734   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
735   PetscErrorCode ierr;
736   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
737   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
738   PetscInt       *HT,key;
739   MatScalar      **HD;
740   PetscReal      tmp;
741 #if defined(PETSC_USE_INFO)
742   PetscInt       ct=0,max=0;
743 #endif
744 
745   PetscFunctionBegin;
746   if (baij->ht) PetscFunctionReturn(0);
747 
748   baij->ht_size = (PetscInt)(factor*nz);
749   ht_size       = baij->ht_size;
750 
751   /* Allocate Memory for Hash Table */
752   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
753   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
754   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
755   HD   = baij->hd;
756   HT   = baij->ht;
757 
758   /* Loop Over A */
759   for (i=0; i<a->mbs; i++) {
760     for (j=ai[i]; j<ai[i+1]; j++) {
761       row = i+rstart;
762       col = aj[j]+cstart;
763 
764       key = row*Nbs + col + 1;
765       h1  = HASH(ht_size,key,tmp);
766       for (k=0; k<ht_size; k++) {
767         if (!HT[(h1+k)%ht_size]) {
768           HT[(h1+k)%ht_size] = key;
769           HD[(h1+k)%ht_size] = a->a + j*bs2;
770           break;
771 #if defined(PETSC_USE_INFO)
772         } else {
773           ct++;
774 #endif
775         }
776       }
777 #if defined(PETSC_USE_INFO)
778       if (k> max) max = k;
779 #endif
780     }
781   }
782   /* Loop Over B */
783   for (i=0; i<b->mbs; i++) {
784     for (j=bi[i]; j<bi[i+1]; j++) {
785       row = i+rstart;
786       col = garray[bj[j]];
787       key = row*Nbs + col + 1;
788       h1  = HASH(ht_size,key,tmp);
789       for (k=0; k<ht_size; k++) {
790         if (!HT[(h1+k)%ht_size]) {
791           HT[(h1+k)%ht_size] = key;
792           HD[(h1+k)%ht_size] = b->a + j*bs2;
793           break;
794 #if defined(PETSC_USE_INFO)
795         } else {
796           ct++;
797 #endif
798         }
799       }
800 #if defined(PETSC_USE_INFO)
801       if (k> max) max = k;
802 #endif
803     }
804   }
805 
806   /* Print Summary */
807 #if defined(PETSC_USE_INFO)
808   for (i=0,j=0; i<ht_size; i++) {
809     if (HT[i]) {j++;}
810   }
811   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
812 #endif
813   PetscFunctionReturn(0);
814 }
815 
816 #undef __FUNCT__
817 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
818 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
819 {
820   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
821   PetscErrorCode ierr;
822   PetscInt       nstash,reallocs;
823   InsertMode     addv;
824 
825   PetscFunctionBegin;
826   if (baij->donotstash || mat->nooffprocentries) {
827     PetscFunctionReturn(0);
828   }
829 
830   /* make sure all processors are either in INSERTMODE or ADDMODE */
831   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
832   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
833   mat->insertmode = addv; /* in case this processor had no cache */
834 
835   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
836   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
837   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
838   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
839   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
840   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
841   PetscFunctionReturn(0);
842 }
843 
844 #undef __FUNCT__
845 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
846 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
847 {
848   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
849   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
850   PetscErrorCode ierr;
851   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
852   PetscInt       *row,*col;
853   PetscBool      r1,r2,r3,other_disassembled;
854   MatScalar      *val;
855   InsertMode     addv = mat->insertmode;
856   PetscMPIInt    n;
857 
858   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
859   PetscFunctionBegin;
860   if (!baij->donotstash && !mat->nooffprocentries) {
861     while (1) {
862       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
863       if (!flg) break;
864 
865       for (i=0; i<n;) {
866         /* Now identify the consecutive vals belonging to the same row */
867         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
868         if (j < n) ncols = j-i;
869         else       ncols = n-i;
870         /* Now assemble all these values with a single function call */
871         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
872         i = j;
873       }
874     }
875     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
876     /* Now process the block-stash. Since the values are stashed column-oriented,
877        set the roworiented flag to column oriented, and after MatSetValues()
878        restore the original flags */
879     r1 = baij->roworiented;
880     r2 = a->roworiented;
881     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
882     baij->roworiented = PETSC_FALSE;
883     a->roworiented    = PETSC_FALSE;
884     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
885     while (1) {
886       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
887       if (!flg) break;
888 
889       for (i=0; i<n;) {
890         /* Now identify the consecutive vals belonging to the same row */
891         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
892         if (j < n) ncols = j-i;
893         else       ncols = n-i;
894         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
895         i = j;
896       }
897     }
898     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
899     baij->roworiented = r1;
900     a->roworiented    = r2;
901     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
902   }
903 
904   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
905   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
906 
907   /* determine if any processor has disassembled, if so we must
908      also disassemble ourselfs, in order that we may reassemble. */
909   /*
910      if nonzero structure of submatrix B cannot change then we know that
911      no processor disassembled thus we can skip this stuff
912   */
913   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
914     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
915     if (mat->was_assembled && !other_disassembled) {
916       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
917     }
918   }
919 
920   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
921     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
922   }
923   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
924   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
925   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
926 
927 #if defined(PETSC_USE_INFO)
928   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
929     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
930     baij->ht_total_ct  = 0;
931     baij->ht_insert_ct = 0;
932   }
933 #endif
934   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
935     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
936     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
937     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
938   }
939 
940   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
941   baij->rowvalues = 0;
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
947 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
948 {
949   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
950   PetscErrorCode    ierr;
951   PetscMPIInt       size = baij->size,rank = baij->rank;
952   PetscInt          bs = mat->rmap->bs;
953   PetscBool         iascii,isdraw;
954   PetscViewer       sviewer;
955   PetscViewerFormat format;
956 
957   PetscFunctionBegin;
958   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
959   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
960   if (iascii) {
961     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
962     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
963       MatInfo info;
964       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
965       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
966       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
967       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
968              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
969       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
970       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
971       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
972       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
973       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
974       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
975       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
976       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
977       PetscFunctionReturn(0);
978     } else if (format == PETSC_VIEWER_ASCII_INFO) {
979       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
980       PetscFunctionReturn(0);
981     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
982       PetscFunctionReturn(0);
983     }
984   }
985 
986   if (isdraw) {
987     PetscDraw       draw;
988     PetscBool  isnull;
989     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
990     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
991   }
992 
993   if (size == 1) {
994     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
995     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
996   } else {
997     /* assemble the entire matrix onto first processor. */
998     Mat         A;
999     Mat_SeqBAIJ *Aloc;
1000     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1001     MatScalar   *a;
1002 
1003     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1004     /* Perhaps this should be the type of mat? */
1005     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1006     if (!rank) {
1007       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1008     } else {
1009       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1010     }
1011     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1012     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1013     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1014     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1015 
1016     /* copy over the A part */
1017     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1018     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1019     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1020 
1021     for (i=0; i<mbs; i++) {
1022       rvals[0] = bs*(baij->rstartbs + i);
1023       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1024       for (j=ai[i]; j<ai[i+1]; j++) {
1025         col = (baij->cstartbs+aj[j])*bs;
1026         for (k=0; k<bs; k++) {
1027           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1028           col++; a += bs;
1029         }
1030       }
1031     }
1032     /* copy over the B part */
1033     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1034     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1035     for (i=0; i<mbs; i++) {
1036       rvals[0] = bs*(baij->rstartbs + i);
1037       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1038       for (j=ai[i]; j<ai[i+1]; j++) {
1039         col = baij->garray[aj[j]]*bs;
1040         for (k=0; k<bs; k++) {
1041           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1042           col++; a += bs;
1043         }
1044       }
1045     }
1046     ierr = PetscFree(rvals);CHKERRQ(ierr);
1047     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1048     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1049     /*
1050        Everyone has to call to draw the matrix since the graphics waits are
1051        synchronized across all processors that share the PetscDraw object
1052     */
1053     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1054     if (!rank) {
1055       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1056     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1057       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1058       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1059     }
1060     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1061     ierr = MatDestroy(&A);CHKERRQ(ierr);
1062   }
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1068 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1069 {
1070   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1071   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1072   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1073   PetscErrorCode ierr;
1074   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1075   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1076   int            fd;
1077   PetscScalar    *column_values;
1078   FILE           *file;
1079   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1080   PetscInt       message_count,flowcontrolcount;
1081 
1082   PetscFunctionBegin;
1083   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1084   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1085   nz   = bs2*(A->nz + B->nz);
1086   rlen = mat->rmap->n;
1087   if (!rank) {
1088     header[0] = MAT_FILE_CLASSID;
1089     header[1] = mat->rmap->N;
1090     header[2] = mat->cmap->N;
1091     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1092     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1093     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1094     /* get largest number of rows any processor has */
1095     range = mat->rmap->range;
1096     for (i=1; i<size; i++) {
1097       rlen = PetscMax(rlen,range[i+1] - range[i]);
1098     }
1099   } else {
1100     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1101   }
1102 
1103   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1104   /* compute lengths of each row  */
1105   for (i=0; i<a->mbs; i++) {
1106     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1107   }
1108   /* store the row lengths to the file */
1109   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1110   if (!rank) {
1111     MPI_Status status;
1112     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1113     rlen  = (range[1] - range[0])/bs;
1114     for (i=0; i<rlen; i++) {
1115       for (j=0; j<bs; j++) {
1116         row_lens[i*bs+j] = bs*crow_lens[i];
1117       }
1118     }
1119     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1120     for (i=1; i<size; i++) {
1121       rlen = (range[i+1] - range[i])/bs;
1122       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1123       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1124       for (k=0; k<rlen; k++) {
1125         for (j=0; j<bs; j++) {
1126           row_lens[k*bs+j] = bs*crow_lens[k];
1127         }
1128       }
1129       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1130     }
1131     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1132     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1133   } else {
1134     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1135     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1136     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1137   }
1138   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1139 
1140   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1141      information needed to make it for each row from a block row. This does require more communication but still not more than
1142      the communication needed for the nonzero values  */
1143   nzmax = nz; /*  space a largest processor needs */
1144   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1145   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1146   cnt  = 0;
1147   for (i=0; i<a->mbs; i++) {
1148     pcnt = cnt;
1149     for (j=B->i[i]; j<B->i[i+1]; j++) {
1150       if ( (col = garray[B->j[j]]) > cstart) break;
1151       for (l=0; l<bs; l++) {
1152         column_indices[cnt++] = bs*col+l;
1153       }
1154     }
1155     for (k=A->i[i]; k<A->i[i+1]; k++) {
1156       for (l=0; l<bs; l++) {
1157         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1158       }
1159     }
1160     for (; j<B->i[i+1]; j++) {
1161       for (l=0; l<bs; l++) {
1162         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1163       }
1164     }
1165     len = cnt - pcnt;
1166     for (k=1; k<bs; k++) {
1167       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1168       cnt += len;
1169     }
1170   }
1171   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1172 
1173   /* store the columns to the file */
1174   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1175   if (!rank) {
1176     MPI_Status status;
1177     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1178     for (i=1; i<size; i++) {
1179       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1180       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1181       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1182       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1183     }
1184     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1185   } else {
1186     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1187     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1188     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1189     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1190   }
1191   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1192 
1193   /* load up the numerical values */
1194   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1195   cnt = 0;
1196   for (i=0; i<a->mbs; i++) {
1197     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1198     for (j=B->i[i]; j<B->i[i+1]; j++) {
1199       if ( garray[B->j[j]] > cstart) break;
1200       for (l=0; l<bs; l++) {
1201         for (ll=0; ll<bs; ll++) {
1202           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1203         }
1204       }
1205       cnt += bs;
1206     }
1207     for (k=A->i[i]; k<A->i[i+1]; k++) {
1208       for (l=0; l<bs; l++) {
1209         for (ll=0; ll<bs; ll++) {
1210           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1211         }
1212       }
1213       cnt += bs;
1214     }
1215     for (; j<B->i[i+1]; j++) {
1216       for (l=0; l<bs; l++) {
1217         for (ll=0; ll<bs; ll++) {
1218           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1219         }
1220       }
1221       cnt += bs;
1222     }
1223     cnt += (bs-1)*rlen;
1224   }
1225   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1226 
1227   /* store the column values to the file */
1228   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1229   if (!rank) {
1230     MPI_Status status;
1231     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1232     for (i=1; i<size; i++) {
1233       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1234       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1235       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1236       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1237     }
1238     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1239   } else {
1240     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1241     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1242     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1243     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1244   }
1245   ierr = PetscFree(column_values);CHKERRQ(ierr);
1246 
1247   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1248   if (file) {
1249     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1250   }
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatView_MPIBAIJ"
1256 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1257 {
1258   PetscErrorCode ierr;
1259   PetscBool      iascii,isdraw,issocket,isbinary;
1260 
1261   PetscFunctionBegin;
1262   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1263   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1264   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1265   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1266   if (iascii || isdraw || issocket) {
1267     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1268   } else if (isbinary) {
1269     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1270   } else {
1271     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1278 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1279 {
1280   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1281   PetscErrorCode ierr;
1282 
1283   PetscFunctionBegin;
1284 #if defined(PETSC_USE_LOG)
1285   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1286 #endif
1287   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1288   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1289   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1290   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1291 #if defined (PETSC_USE_CTABLE)
1292   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1293 #else
1294   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1295 #endif
1296   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1297   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1298   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1299   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1300   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1301   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1302   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1303   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1304 
1305   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 #undef __FUNCT__
1319 #define __FUNCT__ "MatMult_MPIBAIJ"
1320 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1321 {
1322   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1323   PetscErrorCode ierr;
1324   PetscInt       nt;
1325 
1326   PetscFunctionBegin;
1327   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1328   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1329   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1330   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1331   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1332   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1333   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1334   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1340 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1341 {
1342   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1343   PetscErrorCode ierr;
1344 
1345   PetscFunctionBegin;
1346   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1347   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1348   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1349   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1355 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1356 {
1357   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1358   PetscErrorCode ierr;
1359   PetscBool      merged;
1360 
1361   PetscFunctionBegin;
1362   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1363   /* do nondiagonal part */
1364   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1365   if (!merged) {
1366     /* send it on its way */
1367     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1368     /* do local part */
1369     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1370     /* receive remote parts: note this assumes the values are not actually */
1371     /* inserted in yy until the next line */
1372     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1373   } else {
1374     /* do local part */
1375     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1376     /* send it on its way */
1377     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1378     /* values actually were received in the Begin() but we need to call this nop */
1379     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1380   }
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 #undef __FUNCT__
1385 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1386 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1387 {
1388   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1389   PetscErrorCode ierr;
1390 
1391   PetscFunctionBegin;
1392   /* do nondiagonal part */
1393   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1394   /* send it on its way */
1395   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1396   /* do local part */
1397   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1398   /* receive remote parts: note this assumes the values are not actually */
1399   /* inserted in yy until the next line, which is true for my implementation*/
1400   /* but is not perhaps always true. */
1401   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 /*
1406   This only works correctly for square matrices where the subblock A->A is the
1407    diagonal block
1408 */
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1411 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1412 {
1413   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1414   PetscErrorCode ierr;
1415 
1416   PetscFunctionBegin;
1417   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1418   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatScale_MPIBAIJ"
1424 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1425 {
1426   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1427   PetscErrorCode ierr;
1428 
1429   PetscFunctionBegin;
1430   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1431   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1437 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1438 {
1439   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1440   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1441   PetscErrorCode ierr;
1442   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1443   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1444   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1445 
1446   PetscFunctionBegin;
1447   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1448   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1449   mat->getrowactive = PETSC_TRUE;
1450 
1451   if (!mat->rowvalues && (idx || v)) {
1452     /*
1453         allocate enough space to hold information from the longest row.
1454     */
1455     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1456     PetscInt     max = 1,mbs = mat->mbs,tmp;
1457     for (i=0; i<mbs; i++) {
1458       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1459       if (max < tmp) { max = tmp; }
1460     }
1461     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1462   }
1463   lrow = row - brstart;
1464 
1465   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1466   if (!v)   {pvA = 0; pvB = 0;}
1467   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1468   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1469   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1470   nztot = nzA + nzB;
1471 
1472   cmap  = mat->garray;
1473   if (v  || idx) {
1474     if (nztot) {
1475       /* Sort by increasing column numbers, assuming A and B already sorted */
1476       PetscInt imark = -1;
1477       if (v) {
1478         *v = v_p = mat->rowvalues;
1479         for (i=0; i<nzB; i++) {
1480           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1481           else break;
1482         }
1483         imark = i;
1484         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1485         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1486       }
1487       if (idx) {
1488         *idx = idx_p = mat->rowindices;
1489         if (imark > -1) {
1490           for (i=0; i<imark; i++) {
1491             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1492           }
1493         } else {
1494           for (i=0; i<nzB; i++) {
1495             if (cmap[cworkB[i]/bs] < cstart)
1496               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1497             else break;
1498           }
1499           imark = i;
1500         }
1501         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1502         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1503       }
1504     } else {
1505       if (idx) *idx = 0;
1506       if (v)   *v   = 0;
1507     }
1508   }
1509   *nz = nztot;
1510   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1511   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 #undef __FUNCT__
1516 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1517 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1518 {
1519   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1520 
1521   PetscFunctionBegin;
1522   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1523   baij->getrowactive = PETSC_FALSE;
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 #undef __FUNCT__
1528 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1529 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1530 {
1531   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1532   PetscErrorCode ierr;
1533 
1534   PetscFunctionBegin;
1535   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1536   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 #undef __FUNCT__
1541 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1542 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1543 {
1544   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1545   Mat            A = a->A,B = a->B;
1546   PetscErrorCode ierr;
1547   PetscReal      isend[5],irecv[5];
1548 
1549   PetscFunctionBegin;
1550   info->block_size     = (PetscReal)matin->rmap->bs;
1551   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1552   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1553   isend[3] = info->memory;  isend[4] = info->mallocs;
1554   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1555   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1556   isend[3] += info->memory;  isend[4] += info->mallocs;
1557   if (flag == MAT_LOCAL) {
1558     info->nz_used      = isend[0];
1559     info->nz_allocated = isend[1];
1560     info->nz_unneeded  = isend[2];
1561     info->memory       = isend[3];
1562     info->mallocs      = isend[4];
1563   } else if (flag == MAT_GLOBAL_MAX) {
1564     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1565     info->nz_used      = irecv[0];
1566     info->nz_allocated = irecv[1];
1567     info->nz_unneeded  = irecv[2];
1568     info->memory       = irecv[3];
1569     info->mallocs      = irecv[4];
1570   } else if (flag == MAT_GLOBAL_SUM) {
1571     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1572     info->nz_used      = irecv[0];
1573     info->nz_allocated = irecv[1];
1574     info->nz_unneeded  = irecv[2];
1575     info->memory       = irecv[3];
1576     info->mallocs      = irecv[4];
1577   } else {
1578     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1579   }
1580   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1581   info->fill_ratio_needed = 0;
1582   info->factor_mallocs    = 0;
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1588 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1589 {
1590   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1591   PetscErrorCode ierr;
1592 
1593   PetscFunctionBegin;
1594   switch (op) {
1595   case MAT_NEW_NONZERO_LOCATIONS:
1596   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1597   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1598   case MAT_KEEP_NONZERO_PATTERN:
1599   case MAT_NEW_NONZERO_LOCATION_ERR:
1600     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1601     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1602     break;
1603   case MAT_ROW_ORIENTED:
1604     a->roworiented = flg;
1605     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1606     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1607     break;
1608   case MAT_NEW_DIAGONALS:
1609     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1610     break;
1611   case MAT_IGNORE_OFF_PROC_ENTRIES:
1612     a->donotstash = flg;
1613     break;
1614   case MAT_USE_HASH_TABLE:
1615     a->ht_flag = flg;
1616     break;
1617   case MAT_SYMMETRIC:
1618   case MAT_STRUCTURALLY_SYMMETRIC:
1619   case MAT_HERMITIAN:
1620   case MAT_SYMMETRY_ETERNAL:
1621     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1622     break;
1623   default:
1624     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1625   }
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 #undef __FUNCT__
1630 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1631 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1632 {
1633   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1634   Mat_SeqBAIJ    *Aloc;
1635   Mat            B;
1636   PetscErrorCode ierr;
1637   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1638   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1639   MatScalar      *a;
1640 
1641   PetscFunctionBegin;
1642   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1643   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1644     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1645     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1646     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1647     /* Do not know preallocation information, but must set block size */
1648     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr);
1649   } else {
1650     B = *matout;
1651   }
1652 
1653   /* copy over the A part */
1654   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1655   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1656   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1657 
1658   for (i=0; i<mbs; i++) {
1659     rvals[0] = bs*(baij->rstartbs + i);
1660     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1661     for (j=ai[i]; j<ai[i+1]; j++) {
1662       col = (baij->cstartbs+aj[j])*bs;
1663       for (k=0; k<bs; k++) {
1664         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1665         col++; a += bs;
1666       }
1667     }
1668   }
1669   /* copy over the B part */
1670   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1671   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1672   for (i=0; i<mbs; i++) {
1673     rvals[0] = bs*(baij->rstartbs + i);
1674     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1675     for (j=ai[i]; j<ai[i+1]; j++) {
1676       col = baij->garray[aj[j]]*bs;
1677       for (k=0; k<bs; k++) {
1678         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1679         col++; a += bs;
1680       }
1681     }
1682   }
1683   ierr = PetscFree(rvals);CHKERRQ(ierr);
1684   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1685   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1686 
1687   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1688     *matout = B;
1689   } else {
1690     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1691   }
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1697 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1698 {
1699   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1700   Mat            a = baij->A,b = baij->B;
1701   PetscErrorCode ierr;
1702   PetscInt       s1,s2,s3;
1703 
1704   PetscFunctionBegin;
1705   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1706   if (rr) {
1707     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1708     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1709     /* Overlap communication with computation. */
1710     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1711   }
1712   if (ll) {
1713     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1714     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1715     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1716   }
1717   /* scale  the diagonal block */
1718   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1719 
1720   if (rr) {
1721     /* Do a scatter end and then right scale the off-diagonal block */
1722     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1723     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1724   }
1725 
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 #undef __FUNCT__
1730 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1731 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1732 {
1733   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1734   PetscErrorCode    ierr;
1735   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1736   PetscInt          i,*owners = A->rmap->range;
1737   PetscInt          *nprocs,j,idx,nsends,row;
1738   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1739   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1740   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1741   MPI_Comm          comm = ((PetscObject)A)->comm;
1742   MPI_Request       *send_waits,*recv_waits;
1743   MPI_Status        recv_status,*send_status;
1744   const PetscScalar *xx;
1745   PetscScalar       *bb;
1746 #if defined(PETSC_DEBUG)
1747   PetscBool         found = PETSC_FALSE;
1748 #endif
1749 
1750   PetscFunctionBegin;
1751   /*  first count number of contributors to each processor */
1752   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1753   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1755   j = 0;
1756   for (i=0; i<N; i++) {
1757     if (lastidx > (idx = rows[i])) j = 0;
1758     lastidx = idx;
1759     for (; j<size; j++) {
1760       if (idx >= owners[j] && idx < owners[j+1]) {
1761         nprocs[2*j]++;
1762         nprocs[2*j+1] = 1;
1763         owner[i] = j;
1764 #if defined(PETSC_DEBUG)
1765         found = PETSC_TRUE;
1766 #endif
1767         break;
1768       }
1769     }
1770 #if defined(PETSC_DEBUG)
1771     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1772     found = PETSC_FALSE;
1773 #endif
1774   }
1775   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1776 
1777   if (A->nooffproczerorows) {
1778     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1779     nrecvs = nsends;
1780     nmax   = N;
1781   } else {
1782     /* inform other processors of number of messages and max length*/
1783     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1784   }
1785 
1786   /* post receives:   */
1787   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1788   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1789   for (i=0; i<nrecvs; i++) {
1790     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1791   }
1792 
1793   /* do sends:
1794      1) starts[i] gives the starting index in svalues for stuff going to
1795      the ith processor
1796   */
1797   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1798   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1799   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1800   starts[0]  = 0;
1801   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1802   for (i=0; i<N; i++) {
1803     svalues[starts[owner[i]]++] = rows[i];
1804   }
1805 
1806   starts[0] = 0;
1807   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1808   count = 0;
1809   for (i=0; i<size; i++) {
1810     if (nprocs[2*i+1]) {
1811       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1812     }
1813   }
1814   ierr = PetscFree(starts);CHKERRQ(ierr);
1815 
1816   base = owners[rank];
1817 
1818   /*  wait on receives */
1819   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1820   count  = nrecvs;
1821   slen = 0;
1822   while (count) {
1823     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1824     /* unpack receives into our local space */
1825     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1826     source[imdex]  = recv_status.MPI_SOURCE;
1827     lens[imdex]    = n;
1828     slen          += n;
1829     count--;
1830   }
1831   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1832 
1833   /* move the data into the send scatter */
1834   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1835   count = 0;
1836   for (i=0; i<nrecvs; i++) {
1837     values = rvalues + i*nmax;
1838     for (j=0; j<lens[i]; j++) {
1839       lrows[count++] = values[j] - base;
1840     }
1841   }
1842   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1843   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1844   ierr = PetscFree(owner);CHKERRQ(ierr);
1845   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1846 
1847   /* fix right hand side if needed */
1848   if (x && b) {
1849     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1850     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1851     for (i=0; i<slen; i++) {
1852       bb[lrows[i]] = diag*xx[lrows[i]];
1853     }
1854     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1855     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1856   }
1857 
1858   /* actually zap the local rows */
1859   /*
1860         Zero the required rows. If the "diagonal block" of the matrix
1861      is square and the user wishes to set the diagonal we use separate
1862      code so that MatSetValues() is not called for each diagonal allocating
1863      new memory, thus calling lots of mallocs and slowing things down.
1864 
1865   */
1866   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1867   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1868   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1869     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1870   } else if (diag != 0.0) {
1871     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1872     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1873        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1874     for (i=0; i<slen; i++) {
1875       row  = lrows[i] + rstart_bs;
1876       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1877     }
1878     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1879     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1880   } else {
1881     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1882   }
1883 
1884   ierr = PetscFree(lrows);CHKERRQ(ierr);
1885 
1886   /* wait on sends */
1887   if (nsends) {
1888     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1889     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1890     ierr = PetscFree(send_status);CHKERRQ(ierr);
1891   }
1892   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1893   ierr = PetscFree(svalues);CHKERRQ(ierr);
1894 
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1900 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1901 {
1902   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1903   PetscErrorCode ierr;
1904 
1905   PetscFunctionBegin;
1906   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatEqual_MPIBAIJ"
1914 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1915 {
1916   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1917   Mat            a,b,c,d;
1918   PetscBool      flg;
1919   PetscErrorCode ierr;
1920 
1921   PetscFunctionBegin;
1922   a = matA->A; b = matA->B;
1923   c = matB->A; d = matB->B;
1924 
1925   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1926   if (flg) {
1927     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1928   }
1929   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 #undef __FUNCT__
1934 #define __FUNCT__ "MatCopy_MPIBAIJ"
1935 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1936 {
1937   PetscErrorCode ierr;
1938   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1939   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1940 
1941   PetscFunctionBegin;
1942   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1943   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1944     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1945   } else {
1946     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1947     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1948   }
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1954 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1955 {
1956   PetscErrorCode ierr;
1957 
1958   PetscFunctionBegin;
1959   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1965 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1966 {
1967   PetscErrorCode ierr;
1968   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1969   PetscBLASInt   bnz,one=1;
1970   Mat_SeqBAIJ    *x,*y;
1971 
1972   PetscFunctionBegin;
1973   if (str == SAME_NONZERO_PATTERN) {
1974     PetscScalar alpha = a;
1975     x = (Mat_SeqBAIJ *)xx->A->data;
1976     y = (Mat_SeqBAIJ *)yy->A->data;
1977     bnz = PetscBLASIntCast(x->nz);
1978     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1979     x = (Mat_SeqBAIJ *)xx->B->data;
1980     y = (Mat_SeqBAIJ *)yy->B->data;
1981     bnz = PetscBLASIntCast(x->nz);
1982     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1983   } else {
1984     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1985   }
1986   PetscFunctionReturn(0);
1987 }
1988 
1989 #undef __FUNCT__
1990 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1991 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1992 {
1993   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1994   PetscErrorCode ierr;
1995 
1996   PetscFunctionBegin;
1997   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1998   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 #undef __FUNCT__
2003 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2004 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2005 {
2006   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2007   PetscErrorCode ierr;
2008 
2009   PetscFunctionBegin;
2010   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2011   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 #undef __FUNCT__
2016 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2017 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2018 {
2019   PetscErrorCode ierr;
2020   IS             iscol_local;
2021   PetscInt       csize;
2022 
2023   PetscFunctionBegin;
2024   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2025   if (call == MAT_REUSE_MATRIX) {
2026     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2027     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2028   } else {
2029     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2030   }
2031   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2032   if (call == MAT_INITIAL_MATRIX) {
2033     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2034     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2035   }
2036   PetscFunctionReturn(0);
2037 }
2038 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2039 #undef __FUNCT__
2040 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2041 /*
2042   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2043   in local and then by concatenating the local matrices the end result.
2044   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2045 */
2046 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2047 {
2048   PetscErrorCode ierr;
2049   PetscMPIInt    rank,size;
2050   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2051   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2052   Mat            M,Mreuse;
2053   MatScalar      *vwork,*aa;
2054   MPI_Comm       comm = ((PetscObject)mat)->comm;
2055   IS             isrow_new, iscol_new;
2056   PetscBool      idflag,allrows, allcols;
2057   Mat_SeqBAIJ    *aij;
2058 
2059 
2060   PetscFunctionBegin;
2061   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2062   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2063   /* The compression and expansion should be avoided. Doesn't point
2064      out errors, might change the indices, hence buggey */
2065   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2066   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2067 
2068   /* Check for special case: each processor gets entire matrix columns */
2069   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2070   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2071   if (idflag && ncol == mat->cmap->N) {
2072     allcols = PETSC_TRUE;
2073   } else {
2074     allcols = PETSC_FALSE;
2075   }
2076 
2077   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2078   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2079   if (idflag && nrow == mat->rmap->N) {
2080     allrows = PETSC_TRUE;
2081   } else {
2082     allrows = PETSC_FALSE;
2083   }
2084   if (call ==  MAT_REUSE_MATRIX) {
2085     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2086     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2087     ierr  = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2088   } else {
2089     ierr   = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2090   }
2091   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2092   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2093   /*
2094       m - number of local rows
2095       n - number of columns (same on all processors)
2096       rstart - first row in new global matrix generated
2097   */
2098   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2099   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2100   m    = m/bs;
2101   n    = n/bs;
2102 
2103   if (call == MAT_INITIAL_MATRIX) {
2104     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2105     ii  = aij->i;
2106     jj  = aij->j;
2107 
2108     /*
2109         Determine the number of non-zeros in the diagonal and off-diagonal
2110         portions of the matrix in order to do correct preallocation
2111     */
2112 
2113     /* first get start and end of "diagonal" columns */
2114     if (csize == PETSC_DECIDE) {
2115       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2116       if (mglobal == n*bs) { /* square matrix */
2117         nlocal = m;
2118       } else {
2119         nlocal = n/size + ((n % size) > rank);
2120       }
2121     } else {
2122       nlocal = csize/bs;
2123     }
2124     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2125     rstart = rend - nlocal;
2126     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2127 
2128     /* next, compute all the lengths */
2129     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2130     olens = dlens + m;
2131     for (i=0; i<m; i++) {
2132       jend = ii[i+1] - ii[i];
2133       olen = 0;
2134       dlen = 0;
2135       for (j=0; j<jend; j++) {
2136         if (*jj < rstart || *jj >= rend) olen++;
2137         else dlen++;
2138         jj++;
2139       }
2140       olens[i] = olen;
2141       dlens[i] = dlen;
2142     }
2143     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2144     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2145     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2146     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2147     ierr = PetscFree(dlens);CHKERRQ(ierr);
2148   } else {
2149     PetscInt ml,nl;
2150 
2151     M = *newmat;
2152     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2153     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2154     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2155     /*
2156          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2157        rather than the slower MatSetValues().
2158     */
2159     M->was_assembled = PETSC_TRUE;
2160     M->assembled     = PETSC_FALSE;
2161   }
2162   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2163   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2164   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2165   ii  = aij->i;
2166   jj  = aij->j;
2167   aa  = aij->a;
2168   for (i=0; i<m; i++) {
2169     row   = rstart/bs + i;
2170     nz    = ii[i+1] - ii[i];
2171     cwork = jj;     jj += nz;
2172     vwork = aa;     aa += nz*bs*bs;
2173     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2174   }
2175 
2176   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2177   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2178   *newmat = M;
2179 
2180   /* save submatrix used in processor for next request */
2181   if (call ==  MAT_INITIAL_MATRIX) {
2182     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2183     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2184   }
2185 
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 #undef __FUNCT__
2190 #define __FUNCT__ "MatPermute_MPIBAIJ"
2191 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2192 {
2193   MPI_Comm       comm,pcomm;
2194   PetscInt       first,rlocal_size,clocal_size,nrows;
2195   const PetscInt *rows;
2196   PetscMPIInt    size;
2197   IS             crowp,growp,irowp,lrowp,lcolp;
2198   PetscErrorCode ierr;
2199 
2200   PetscFunctionBegin;
2201   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2202   /* make a collective version of 'rowp' */
2203   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2204   if (pcomm==comm) {
2205     crowp = rowp;
2206   } else {
2207     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2208     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2209     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2210     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2211   }
2212   /* collect the global row permutation and invert it */
2213   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2214   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2215   if (pcomm!=comm) {
2216     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2217   }
2218   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2219   ierr = ISDestroy(&growp);CHKERRQ(ierr);
2220   /* get the local target indices */
2221   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2222   ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr);
2223   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2224   ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2225   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2226   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2227   /* the column permutation is so much easier;
2228      make a local version of 'colp' and invert it */
2229   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2230   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2231   if (size==1) {
2232     lcolp = colp;
2233   } else {
2234     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2235   }
2236   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2237   /* now we just get the submatrix */
2238   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2239   if (size>1) {
2240     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2241   }
2242   /* clean up */
2243   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2244   PetscFunctionReturn(0);
2245 }
2246 
2247 #undef __FUNCT__
2248 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2249 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2250 {
2251   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2252   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2253 
2254   PetscFunctionBegin;
2255   if (nghosts) { *nghosts = B->nbs;}
2256   if (ghosts) {*ghosts = baij->garray;}
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2261 
2262 #undef __FUNCT__
2263 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2264 /*
2265     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2266 */
2267 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2268 {
2269   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2270   PetscErrorCode        ierr;
2271   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2272   PetscInt              bs,i,n,nrows,j,k,m,ncols,col;
2273   const PetscInt        *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj;
2274   PetscInt              nis = iscoloring->n,nctot,*cols;
2275   PetscInt              *rowhit,M,cstart,cend,colb;
2276   PetscInt              *columnsforrow,l;
2277   IS                    *isa;
2278   PetscBool              done,flg;
2279   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2280   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2281 
2282   PetscFunctionBegin;
2283   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2284   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2285 
2286   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2287   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2288   M                = mat->rmap->n/bs;
2289   cstart           = mat->cmap->rstart/bs;
2290   cend             = mat->cmap->rend/bs;
2291   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2292   c->N             = mat->cmap->N/bs;
2293   c->m             = mat->rmap->n/bs;
2294   c->rstart        = mat->rmap->rstart/bs;
2295 
2296   c->ncolors       = nis;
2297   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2298   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2299   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2300   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2301   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2302   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2303 
2304   /* Allow access to data structures of local part of matrix */
2305   if (!baij->colmap) {
2306     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2307   }
2308   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2309   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2310 
2311   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2312   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2313 
2314   for (i=0; i<nis; i++) {
2315     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2316     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2317     c->ncolumns[i] = n;
2318     if (n) {
2319       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2320       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2321       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2322     } else {
2323       c->columns[i]  = 0;
2324     }
2325 
2326     if (ctype == IS_COLORING_GLOBAL) {
2327       /* Determine the total (parallel) number of columns of this color */
2328       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2329       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2330 
2331       nn   = PetscMPIIntCast(n);
2332       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2333       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2334       if (!nctot) {
2335         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2336       }
2337 
2338       disp[0] = 0;
2339       for (j=1; j<size; j++) {
2340         disp[j] = disp[j-1] + ncolsonproc[j-1];
2341       }
2342 
2343       /* Get complete list of columns for color on each processor */
2344       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2345       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2346       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2347     } else if (ctype == IS_COLORING_GHOSTED) {
2348       /* Determine local number of columns of this color on this process, including ghost points */
2349       nctot = n;
2350       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2351       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2352     } else {
2353       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2354     }
2355 
2356     /*
2357        Mark all rows affect by these columns
2358     */
2359     /* Temporary option to allow for debugging/testing */
2360     flg  = PETSC_FALSE;
2361     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2362     if (!flg) {/*-----------------------------------------------------------------------------*/
2363       /* crude, fast version */
2364       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2365       /* loop over columns*/
2366       for (j=0; j<nctot; j++) {
2367         if (ctype == IS_COLORING_GHOSTED) {
2368           col = ltog[cols[j]];
2369         } else {
2370           col  = cols[j];
2371         }
2372         if (col >= cstart && col < cend) {
2373           /* column is in diagonal block of matrix */
2374           rows = A_cj + A_ci[col-cstart];
2375           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2376         } else {
2377 #if defined (PETSC_USE_CTABLE)
2378           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2379           colb --;
2380 #else
2381           colb = baij->colmap[col] - 1;
2382 #endif
2383           if (colb == -1) {
2384             m = 0;
2385           } else {
2386             colb = colb/bs;
2387             rows = B_cj + B_ci[colb];
2388             m    = B_ci[colb+1] - B_ci[colb];
2389           }
2390         }
2391         /* loop over columns marking them in rowhit */
2392         for (k=0; k<m; k++) {
2393           rowhit[*rows++] = col + 1;
2394         }
2395       }
2396 
2397       /* count the number of hits */
2398       nrows = 0;
2399       for (j=0; j<M; j++) {
2400         if (rowhit[j]) nrows++;
2401       }
2402       c->nrows[i]         = nrows;
2403       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2404       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2405       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2406       nrows = 0;
2407       for (j=0; j<M; j++) {
2408         if (rowhit[j]) {
2409           c->rows[i][nrows]           = j;
2410           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2411           nrows++;
2412         }
2413       }
2414     } else {/*-------------------------------------------------------------------------------*/
2415       /* slow version, using rowhit as a linked list */
2416       PetscInt currentcol,fm,mfm;
2417       rowhit[M] = M;
2418       nrows     = 0;
2419       /* loop over columns*/
2420       for (j=0; j<nctot; j++) {
2421         if (ctype == IS_COLORING_GHOSTED) {
2422           col = ltog[cols[j]];
2423         } else {
2424           col  = cols[j];
2425         }
2426         if (col >= cstart && col < cend) {
2427           /* column is in diagonal block of matrix */
2428           rows = A_cj + A_ci[col-cstart];
2429           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2430         } else {
2431 #if defined (PETSC_USE_CTABLE)
2432           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2433           colb --;
2434 #else
2435           colb = baij->colmap[col] - 1;
2436 #endif
2437           if (colb == -1) {
2438             m = 0;
2439           } else {
2440             colb = colb/bs;
2441             rows = B_cj + B_ci[colb];
2442             m    = B_ci[colb+1] - B_ci[colb];
2443           }
2444         }
2445 
2446         /* loop over columns marking them in rowhit */
2447         fm    = M; /* fm points to first entry in linked list */
2448         for (k=0; k<m; k++) {
2449           currentcol = *rows++;
2450           /* is it already in the list? */
2451           do {
2452             mfm  = fm;
2453             fm   = rowhit[fm];
2454           } while (fm < currentcol);
2455           /* not in list so add it */
2456           if (fm != currentcol) {
2457             nrows++;
2458             columnsforrow[currentcol] = col;
2459             /* next three lines insert new entry into linked list */
2460             rowhit[mfm]               = currentcol;
2461             rowhit[currentcol]        = fm;
2462             fm                        = currentcol;
2463             /* fm points to present position in list since we know the columns are sorted */
2464           } else {
2465             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2466           }
2467         }
2468       }
2469       c->nrows[i]         = nrows;
2470       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2471       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2472       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2473       /* now store the linked list of rows into c->rows[i] */
2474       nrows = 0;
2475       fm    = rowhit[M];
2476       do {
2477         c->rows[i][nrows]            = fm;
2478         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2479         fm                           = rowhit[fm];
2480       } while (fm < M);
2481     } /* ---------------------------------------------------------------------------------------*/
2482     ierr = PetscFree(cols);CHKERRQ(ierr);
2483   }
2484 
2485   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2486   /*
2487        vscale will contain the "diagonal" on processor scalings followed by the off processor
2488   */
2489   if (ctype == IS_COLORING_GLOBAL) {
2490     PetscInt *garray;
2491     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2492     for (i=0; i<baij->B->cmap->n/bs; i++) {
2493       for (j=0; j<bs; j++) {
2494         garray[i*bs+j] = bs*baij->garray[i]+j;
2495       }
2496     }
2497     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2498     ierr = PetscFree(garray);CHKERRQ(ierr);
2499     CHKMEMQ;
2500     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2501     for (k=0; k<c->ncolors; k++) {
2502       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2503       for (l=0; l<c->nrows[k]; l++) {
2504         col = c->columnsforrow[k][l];
2505         if (col >= cstart && col < cend) {
2506           /* column is in diagonal block of matrix */
2507           colb = col - cstart;
2508         } else {
2509           /* column  is in "off-processor" part */
2510 #if defined (PETSC_USE_CTABLE)
2511           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2512           colb --;
2513 #else
2514           colb = baij->colmap[col] - 1;
2515 #endif
2516           colb = colb/bs;
2517           colb += cend - cstart;
2518         }
2519         c->vscaleforrow[k][l] = colb;
2520       }
2521     }
2522   } else if (ctype == IS_COLORING_GHOSTED) {
2523     /* Get gtol mapping */
2524     PetscInt N = mat->cmap->N, *gtol;
2525     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2526     for (i=0; i<N; i++) gtol[i] = -1;
2527     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2528 
2529     c->vscale = 0; /* will be created in MatFDColoringApply() */
2530     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2531     for (k=0; k<c->ncolors; k++) {
2532       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2533       for (l=0; l<c->nrows[k]; l++) {
2534         col = c->columnsforrow[k][l];      /* global column index */
2535         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2536       }
2537     }
2538     ierr = PetscFree(gtol);CHKERRQ(ierr);
2539   }
2540   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2541 
2542   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2543   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2544   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2545   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2546     CHKMEMQ;
2547   PetscFunctionReturn(0);
2548 }
2549 
2550 #undef __FUNCT__
2551 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2552 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2553 {
2554   Mat            B;
2555   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2556   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2557   Mat_SeqAIJ     *b;
2558   PetscErrorCode ierr;
2559   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2560   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2561   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2562 
2563   PetscFunctionBegin;
2564   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2565   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2566 
2567   /* ----------------------------------------------------------------
2568      Tell every processor the number of nonzeros per row
2569   */
2570   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2571   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2572     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2573   }
2574   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2575   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2576   displs     = recvcounts + size;
2577   for (i=0; i<size; i++) {
2578     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2579     displs[i]     = A->rmap->range[i]/bs;
2580   }
2581 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2582   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2583 #else
2584   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2585 #endif
2586   /* ---------------------------------------------------------------
2587      Create the sequential matrix of the same type as the local block diagonal
2588   */
2589   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2590   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2591   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2592   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2593   b = (Mat_SeqAIJ *)B->data;
2594 
2595   /*--------------------------------------------------------------------
2596     Copy my part of matrix column indices over
2597   */
2598   sendcount  = ad->nz + bd->nz;
2599   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2600   a_jsendbuf = ad->j;
2601   b_jsendbuf = bd->j;
2602   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2603   cnt        = 0;
2604   for (i=0; i<n; i++) {
2605 
2606     /* put in lower diagonal portion */
2607     m = bd->i[i+1] - bd->i[i];
2608     while (m > 0) {
2609       /* is it above diagonal (in bd (compressed) numbering) */
2610       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2611       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2612       m--;
2613     }
2614 
2615     /* put in diagonal portion */
2616     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2617       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2618     }
2619 
2620     /* put in upper diagonal portion */
2621     while (m-- > 0) {
2622       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2623     }
2624   }
2625   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2626 
2627   /*--------------------------------------------------------------------
2628     Gather all column indices to all processors
2629   */
2630   for (i=0; i<size; i++) {
2631     recvcounts[i] = 0;
2632     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2633       recvcounts[i] += lens[j];
2634     }
2635   }
2636   displs[0]  = 0;
2637   for (i=1; i<size; i++) {
2638     displs[i] = displs[i-1] + recvcounts[i-1];
2639   }
2640 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2641   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2642 #else
2643   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2644 #endif
2645   /*--------------------------------------------------------------------
2646     Assemble the matrix into useable form (note numerical values not yet set)
2647   */
2648   /* set the b->ilen (length of each row) values */
2649   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2650   /* set the b->i indices */
2651   b->i[0] = 0;
2652   for (i=1; i<=A->rmap->N/bs; i++) {
2653     b->i[i] = b->i[i-1] + lens[i-1];
2654   }
2655   ierr = PetscFree(lens);CHKERRQ(ierr);
2656   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2657   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2658   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2659 
2660   if (A->symmetric) {
2661     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2662   } else if (A->hermitian) {
2663     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2664   } else if (A->structurally_symmetric) {
2665     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2666   }
2667   *newmat = B;
2668   PetscFunctionReturn(0);
2669 }
2670 
2671 #undef __FUNCT__
2672 #define __FUNCT__ "MatSOR_MPIBAIJ"
2673 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2674 {
2675   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2676   PetscErrorCode ierr;
2677   Vec            bb1 = 0;
2678 
2679   PetscFunctionBegin;
2680   if (flag == SOR_APPLY_UPPER) {
2681     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2682     PetscFunctionReturn(0);
2683   }
2684 
2685   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2686     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2687   }
2688 
2689   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2690     if (flag & SOR_ZERO_INITIAL_GUESS) {
2691       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2692       its--;
2693     }
2694 
2695     while (its--) {
2696       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2697       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2698 
2699       /* update rhs: bb1 = bb - B*x */
2700       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2701       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2702 
2703       /* local sweep */
2704       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2705     }
2706   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2707     if (flag & SOR_ZERO_INITIAL_GUESS) {
2708       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2709       its--;
2710     }
2711     while (its--) {
2712       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2713       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2714 
2715       /* update rhs: bb1 = bb - B*x */
2716       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2717       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2718 
2719       /* local sweep */
2720       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2721     }
2722   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2723     if (flag & SOR_ZERO_INITIAL_GUESS) {
2724       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2725       its--;
2726     }
2727     while (its--) {
2728       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2729       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2730 
2731       /* update rhs: bb1 = bb - B*x */
2732       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2733       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2734 
2735       /* local sweep */
2736       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2737     }
2738   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2739 
2740   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2741   PetscFunctionReturn(0);
2742 }
2743 
2744 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2745 
2746 #undef __FUNCT__
2747 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2748 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2749 {
2750   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2751   PetscErrorCode ierr;
2752 
2753   PetscFunctionBegin;
2754   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2755   PetscFunctionReturn(0);
2756 }
2757 
2758 
2759 /* -------------------------------------------------------------------*/
2760 static struct _MatOps MatOps_Values = {
2761        MatSetValues_MPIBAIJ,
2762        MatGetRow_MPIBAIJ,
2763        MatRestoreRow_MPIBAIJ,
2764        MatMult_MPIBAIJ,
2765 /* 4*/ MatMultAdd_MPIBAIJ,
2766        MatMultTranspose_MPIBAIJ,
2767        MatMultTransposeAdd_MPIBAIJ,
2768        0,
2769        0,
2770        0,
2771 /*10*/ 0,
2772        0,
2773        0,
2774        MatSOR_MPIBAIJ,
2775        MatTranspose_MPIBAIJ,
2776 /*15*/ MatGetInfo_MPIBAIJ,
2777        MatEqual_MPIBAIJ,
2778        MatGetDiagonal_MPIBAIJ,
2779        MatDiagonalScale_MPIBAIJ,
2780        MatNorm_MPIBAIJ,
2781 /*20*/ MatAssemblyBegin_MPIBAIJ,
2782        MatAssemblyEnd_MPIBAIJ,
2783        MatSetOption_MPIBAIJ,
2784        MatZeroEntries_MPIBAIJ,
2785 /*24*/ MatZeroRows_MPIBAIJ,
2786        0,
2787        0,
2788        0,
2789        0,
2790 /*29*/ MatSetUp_MPIBAIJ,
2791        0,
2792        0,
2793        0,
2794        0,
2795 /*34*/ MatDuplicate_MPIBAIJ,
2796        0,
2797        0,
2798        0,
2799        0,
2800 /*39*/ MatAXPY_MPIBAIJ,
2801        MatGetSubMatrices_MPIBAIJ,
2802        MatIncreaseOverlap_MPIBAIJ,
2803        MatGetValues_MPIBAIJ,
2804        MatCopy_MPIBAIJ,
2805 /*44*/ 0,
2806        MatScale_MPIBAIJ,
2807        0,
2808        0,
2809        0,
2810 /*49*/ 0,
2811        0,
2812        0,
2813        0,
2814        0,
2815 /*54*/ MatFDColoringCreate_MPIBAIJ,
2816        0,
2817        MatSetUnfactored_MPIBAIJ,
2818        MatPermute_MPIBAIJ,
2819        MatSetValuesBlocked_MPIBAIJ,
2820 /*59*/ MatGetSubMatrix_MPIBAIJ,
2821        MatDestroy_MPIBAIJ,
2822        MatView_MPIBAIJ,
2823        0,
2824        0,
2825 /*64*/ 0,
2826        0,
2827        0,
2828        0,
2829        0,
2830 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2831        0,
2832        0,
2833        0,
2834        0,
2835 /*74*/ 0,
2836        MatFDColoringApply_BAIJ,
2837        0,
2838        0,
2839        0,
2840 /*79*/ 0,
2841        0,
2842        0,
2843        0,
2844        MatLoad_MPIBAIJ,
2845 /*84*/ 0,
2846        0,
2847        0,
2848        0,
2849        0,
2850 /*89*/ 0,
2851        0,
2852        0,
2853        0,
2854        0,
2855 /*94*/ 0,
2856        0,
2857        0,
2858        0,
2859        0,
2860 /*99*/ 0,
2861        0,
2862        0,
2863        0,
2864        0,
2865 /*104*/0,
2866        MatRealPart_MPIBAIJ,
2867        MatImaginaryPart_MPIBAIJ,
2868        0,
2869        0,
2870 /*109*/0,
2871        0,
2872        0,
2873        0,
2874        0,
2875 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2876        0,
2877        MatGetGhosts_MPIBAIJ,
2878        0,
2879        0,
2880 /*119*/0,
2881        0,
2882        0,
2883        0,
2884        0,
2885 /*124*/0,
2886        0,
2887        MatInvertBlockDiagonal_MPIBAIJ
2888 };
2889 
2890 EXTERN_C_BEGIN
2891 #undef __FUNCT__
2892 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2893 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2894 {
2895   PetscFunctionBegin;
2896   *a = ((Mat_MPIBAIJ *)A->data)->A;
2897   PetscFunctionReturn(0);
2898 }
2899 EXTERN_C_END
2900 
2901 EXTERN_C_BEGIN
2902 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2903 EXTERN_C_END
2904 
2905 EXTERN_C_BEGIN
2906 #undef __FUNCT__
2907 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2908 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2909 {
2910   PetscInt       m,rstart,cstart,cend;
2911   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2912   const PetscInt *JJ=0;
2913   PetscScalar    *values=0;
2914   PetscErrorCode ierr;
2915 
2916   PetscFunctionBegin;
2917   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2918   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2919   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2920   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2921   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2922   m      = B->rmap->n/bs;
2923   rstart = B->rmap->rstart/bs;
2924   cstart = B->cmap->rstart/bs;
2925   cend   = B->cmap->rend/bs;
2926 
2927   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2928   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2929   for (i=0; i<m; i++) {
2930     nz = ii[i+1] - ii[i];
2931     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2932     nz_max = PetscMax(nz_max,nz);
2933     JJ  = jj + ii[i];
2934     for (j=0; j<nz; j++) {
2935       if (*JJ >= cstart) break;
2936       JJ++;
2937     }
2938     d = 0;
2939     for (; j<nz; j++) {
2940       if (*JJ++ >= cend) break;
2941       d++;
2942     }
2943     d_nnz[i] = d;
2944     o_nnz[i] = nz - d;
2945   }
2946   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2947   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2948 
2949   values = (PetscScalar*)V;
2950   if (!values) {
2951     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2952     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2953   }
2954   for (i=0; i<m; i++) {
2955     PetscInt          row    = i + rstart;
2956     PetscInt          ncols  = ii[i+1] - ii[i];
2957     const PetscInt    *icols = jj + ii[i];
2958     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2959     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2960   }
2961 
2962   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2963   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2964   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2965   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2966   PetscFunctionReturn(0);
2967 }
2968 EXTERN_C_END
2969 
2970 #undef __FUNCT__
2971 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2972 /*@C
2973    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2974    (the default parallel PETSc format).
2975 
2976    Collective on MPI_Comm
2977 
2978    Input Parameters:
2979 +  A - the matrix
2980 .  bs - the block size
2981 .  i - the indices into j for the start of each local row (starts with zero)
2982 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2983 -  v - optional values in the matrix
2984 
2985    Level: developer
2986 
2987 .keywords: matrix, aij, compressed row, sparse, parallel
2988 
2989 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2990 @*/
2991 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2992 {
2993   PetscErrorCode ierr;
2994 
2995   PetscFunctionBegin;
2996   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2997   PetscValidType(B,1);
2998   PetscValidLogicalCollectiveInt(B,bs,2);
2999   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3000   PetscFunctionReturn(0);
3001 }
3002 
3003 EXTERN_C_BEGIN
3004 #undef __FUNCT__
3005 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3006 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
3007 {
3008   Mat_MPIBAIJ    *b;
3009   PetscErrorCode ierr;
3010   PetscInt       i;
3011   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3012 
3013   PetscFunctionBegin;
3014   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3015   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3016 
3017   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3018   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3019   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3020   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3021 
3022   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3023   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3024   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3025   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3026   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3027 
3028   if (d_nnz) {
3029     for (i=0; i<B->rmap->n/bs; i++) {
3030       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3031     }
3032   }
3033   if (o_nnz) {
3034     for (i=0; i<B->rmap->n/bs; i++) {
3035       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3036     }
3037   }
3038 
3039   b = (Mat_MPIBAIJ*)B->data;
3040   b->bs2 = bs*bs;
3041   b->mbs = B->rmap->n/bs;
3042   b->nbs = B->cmap->n/bs;
3043   b->Mbs = B->rmap->N/bs;
3044   b->Nbs = B->cmap->N/bs;
3045 
3046   for (i=0; i<=b->size; i++) {
3047     b->rangebs[i] = B->rmap->range[i]/bs;
3048   }
3049   b->rstartbs = B->rmap->rstart/bs;
3050   b->rendbs   = B->rmap->rend/bs;
3051   b->cstartbs = B->cmap->rstart/bs;
3052   b->cendbs   = B->cmap->rend/bs;
3053 
3054   if (!B->preallocated) {
3055     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3056     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3057     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3058     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3059     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3060     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3061     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3062     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3063     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3064   }
3065 
3066   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3067   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3068   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3069   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3070   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3071   B->preallocated = PETSC_TRUE;
3072   PetscFunctionReturn(0);
3073 }
3074 EXTERN_C_END
3075 
3076 EXTERN_C_BEGIN
3077 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3078 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3079 EXTERN_C_END
3080 
3081 
3082 EXTERN_C_BEGIN
3083 #undef __FUNCT__
3084 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3085 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3086 {
3087   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3088   PetscErrorCode ierr;
3089   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3090   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3091   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3092 
3093   PetscFunctionBegin;
3094   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3095   ii[0] = 0;
3096   CHKMEMQ;
3097   for (i=0; i<M; i++) {
3098     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3099     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3100     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3101     /* remove one from count of matrix has diagonal */
3102     for (j=id[i]; j<id[i+1]; j++) {
3103       if (jd[j] == i) {ii[i+1]--;break;}
3104     }
3105   CHKMEMQ;
3106   }
3107   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3108   cnt = 0;
3109   for (i=0; i<M; i++) {
3110     for (j=io[i]; j<io[i+1]; j++) {
3111       if (garray[jo[j]] > rstart) break;
3112       jj[cnt++] = garray[jo[j]];
3113   CHKMEMQ;
3114     }
3115     for (k=id[i]; k<id[i+1]; k++) {
3116       if (jd[k] != i) {
3117         jj[cnt++] = rstart + jd[k];
3118   CHKMEMQ;
3119       }
3120     }
3121     for (;j<io[i+1]; j++) {
3122       jj[cnt++] = garray[jo[j]];
3123   CHKMEMQ;
3124     }
3125   }
3126   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3127   PetscFunctionReturn(0);
3128 }
3129 EXTERN_C_END
3130 
3131 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3132 EXTERN_C_BEGIN
3133 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3134 EXTERN_C_END
3135 
3136 EXTERN_C_BEGIN
3137 #undef __FUNCT__
3138 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3139 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3140 {
3141   PetscErrorCode ierr;
3142   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3143   Mat            B;
3144   Mat_MPIAIJ     *b;
3145 
3146   PetscFunctionBegin;
3147   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3148 
3149   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3150   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3151   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3152   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3153   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3154   b = (Mat_MPIAIJ*) B->data;
3155 
3156   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3157   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3158   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3159   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3160   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3161   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3162   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3163   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3164   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3165   if (reuse == MAT_REUSE_MATRIX) {
3166     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3167   } else {
3168    *newmat = B;
3169   }
3170   PetscFunctionReturn(0);
3171 }
3172 EXTERN_C_END
3173 
3174 EXTERN_C_BEGIN
3175 #if defined(PETSC_HAVE_MUMPS)
3176 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3177 #endif
3178 EXTERN_C_END
3179 
3180 /*MC
3181    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3182 
3183    Options Database Keys:
3184 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3185 . -mat_block_size <bs> - set the blocksize used to store the matrix
3186 - -mat_use_hash_table <fact>
3187 
3188   Level: beginner
3189 
3190 .seealso: MatCreateMPIBAIJ
3191 M*/
3192 
3193 EXTERN_C_BEGIN
3194 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3195 EXTERN_C_END
3196 
3197 EXTERN_C_BEGIN
3198 #undef __FUNCT__
3199 #define __FUNCT__ "MatCreate_MPIBAIJ"
3200 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3201 {
3202   Mat_MPIBAIJ    *b;
3203   PetscErrorCode ierr;
3204   PetscBool      flg;
3205 
3206   PetscFunctionBegin;
3207   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3208   B->data = (void*)b;
3209 
3210   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3211   B->assembled  = PETSC_FALSE;
3212 
3213   B->insertmode = NOT_SET_VALUES;
3214   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3215   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3216 
3217   /* build local table of row and column ownerships */
3218   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3219 
3220   /* build cache for off array entries formed */
3221   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3222   b->donotstash  = PETSC_FALSE;
3223   b->colmap      = PETSC_NULL;
3224   b->garray      = PETSC_NULL;
3225   b->roworiented = PETSC_TRUE;
3226 
3227   /* stuff used in block assembly */
3228   b->barray       = 0;
3229 
3230   /* stuff used for matrix vector multiply */
3231   b->lvec         = 0;
3232   b->Mvctx        = 0;
3233 
3234   /* stuff for MatGetRow() */
3235   b->rowindices   = 0;
3236   b->rowvalues    = 0;
3237   b->getrowactive = PETSC_FALSE;
3238 
3239   /* hash table stuff */
3240   b->ht           = 0;
3241   b->hd           = 0;
3242   b->ht_size      = 0;
3243   b->ht_flag      = PETSC_FALSE;
3244   b->ht_fact      = 0;
3245   b->ht_total_ct  = 0;
3246   b->ht_insert_ct = 0;
3247 
3248   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3249   b->ijonly       = PETSC_FALSE;
3250 
3251   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3252     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3253     if (flg) {
3254       PetscReal fact = 1.39;
3255       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3256       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3257       if (fact <= 1.0) fact = 1.39;
3258       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3259       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3260     }
3261   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3262 
3263 #if defined(PETSC_HAVE_MUMPS)
3264   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3265 #endif
3266   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3267                                      "MatConvert_MPIBAIJ_MPIAdj",
3268                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3269   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3270                                      "MatConvert_MPIBAIJ_MPIAIJ",
3271                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3272   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3273                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3274                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3275   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3276                                      "MatStoreValues_MPIBAIJ",
3277                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3278   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3279                                      "MatRetrieveValues_MPIBAIJ",
3280                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3281   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3282                                      "MatGetDiagonalBlock_MPIBAIJ",
3283                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3284   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3285                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3286                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3287   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3288                                      "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3289                                      MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3290   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3291                                      "MatDiagonalScaleLocal_MPIBAIJ",
3292                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3293   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3294                                      "MatSetHashTableFactor_MPIBAIJ",
3295                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3296   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3297                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3298                                       MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3299   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3300   PetscFunctionReturn(0);
3301 }
3302 EXTERN_C_END
3303 
3304 /*MC
3305    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3306 
3307    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3308    and MATMPIBAIJ otherwise.
3309 
3310    Options Database Keys:
3311 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3312 
3313   Level: beginner
3314 
3315 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3316 M*/
3317 
3318 #undef __FUNCT__
3319 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3320 /*@C
3321    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3322    (block compressed row).  For good matrix assembly performance
3323    the user should preallocate the matrix storage by setting the parameters
3324    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3325    performance can be increased by more than a factor of 50.
3326 
3327    Collective on Mat
3328 
3329    Input Parameters:
3330 +  A - the matrix
3331 .  bs   - size of blockk
3332 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3333            submatrix  (same for all local rows)
3334 .  d_nnz - array containing the number of block nonzeros in the various block rows
3335            of the in diagonal portion of the local (possibly different for each block
3336            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3337            set it even if it is zero.
3338 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3339            submatrix (same for all local rows).
3340 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3341            off-diagonal portion of the local submatrix (possibly different for
3342            each block row) or PETSC_NULL.
3343 
3344    If the *_nnz parameter is given then the *_nz parameter is ignored
3345 
3346    Options Database Keys:
3347 +   -mat_block_size - size of the blocks to use
3348 -   -mat_use_hash_table <fact>
3349 
3350    Notes:
3351    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3352    than it must be used on all processors that share the object for that argument.
3353 
3354    Storage Information:
3355    For a square global matrix we define each processor's diagonal portion
3356    to be its local rows and the corresponding columns (a square submatrix);
3357    each processor's off-diagonal portion encompasses the remainder of the
3358    local matrix (a rectangular submatrix).
3359 
3360    The user can specify preallocated storage for the diagonal part of
3361    the local submatrix with either d_nz or d_nnz (not both).  Set
3362    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3363    memory allocation.  Likewise, specify preallocated storage for the
3364    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3365 
3366    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3367    the figure below we depict these three local rows and all columns (0-11).
3368 
3369 .vb
3370            0 1 2 3 4 5 6 7 8 9 10 11
3371           -------------------
3372    row 3  |  o o o d d d o o o o o o
3373    row 4  |  o o o d d d o o o o o o
3374    row 5  |  o o o d d d o o o o o o
3375           -------------------
3376 .ve
3377 
3378    Thus, any entries in the d locations are stored in the d (diagonal)
3379    submatrix, and any entries in the o locations are stored in the
3380    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3381    stored simply in the MATSEQBAIJ format for compressed row storage.
3382 
3383    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3384    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3385    In general, for PDE problems in which most nonzeros are near the diagonal,
3386    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3387    or you will get TERRIBLE performance; see the users' manual chapter on
3388    matrices.
3389 
3390    You can call MatGetInfo() to get information on how effective the preallocation was;
3391    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3392    You can also run with the option -info and look for messages with the string
3393    malloc in them to see if additional memory allocation was needed.
3394 
3395    Level: intermediate
3396 
3397 .keywords: matrix, block, aij, compressed row, sparse, parallel
3398 
3399 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3400 @*/
3401 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3402 {
3403   PetscErrorCode ierr;
3404 
3405   PetscFunctionBegin;
3406   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3407   PetscValidType(B,1);
3408   PetscValidLogicalCollectiveInt(B,bs,2);
3409   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3410   PetscFunctionReturn(0);
3411 }
3412 
3413 #undef __FUNCT__
3414 #define __FUNCT__ "MatCreateBAIJ"
3415 /*@C
3416    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3417    (block compressed row).  For good matrix assembly performance
3418    the user should preallocate the matrix storage by setting the parameters
3419    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3420    performance can be increased by more than a factor of 50.
3421 
3422    Collective on MPI_Comm
3423 
3424    Input Parameters:
3425 +  comm - MPI communicator
3426 .  bs   - size of blockk
3427 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3428            This value should be the same as the local size used in creating the
3429            y vector for the matrix-vector product y = Ax.
3430 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3431            This value should be the same as the local size used in creating the
3432            x vector for the matrix-vector product y = Ax.
3433 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3434 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3435 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3436            submatrix  (same for all local rows)
3437 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3438            of the in diagonal portion of the local (possibly different for each block
3439            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3440            and set it even if it is zero.
3441 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3442            submatrix (same for all local rows).
3443 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3444            off-diagonal portion of the local submatrix (possibly different for
3445            each block row) or PETSC_NULL.
3446 
3447    Output Parameter:
3448 .  A - the matrix
3449 
3450    Options Database Keys:
3451 +   -mat_block_size - size of the blocks to use
3452 -   -mat_use_hash_table <fact>
3453 
3454    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3455    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3456    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3457 
3458    Notes:
3459    If the *_nnz parameter is given then the *_nz parameter is ignored
3460 
3461    A nonzero block is any block that as 1 or more nonzeros in it
3462 
3463    The user MUST specify either the local or global matrix dimensions
3464    (possibly both).
3465 
3466    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3467    than it must be used on all processors that share the object for that argument.
3468 
3469    Storage Information:
3470    For a square global matrix we define each processor's diagonal portion
3471    to be its local rows and the corresponding columns (a square submatrix);
3472    each processor's off-diagonal portion encompasses the remainder of the
3473    local matrix (a rectangular submatrix).
3474 
3475    The user can specify preallocated storage for the diagonal part of
3476    the local submatrix with either d_nz or d_nnz (not both).  Set
3477    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3478    memory allocation.  Likewise, specify preallocated storage for the
3479    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3480 
3481    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3482    the figure below we depict these three local rows and all columns (0-11).
3483 
3484 .vb
3485            0 1 2 3 4 5 6 7 8 9 10 11
3486           -------------------
3487    row 3  |  o o o d d d o o o o o o
3488    row 4  |  o o o d d d o o o o o o
3489    row 5  |  o o o d d d o o o o o o
3490           -------------------
3491 .ve
3492 
3493    Thus, any entries in the d locations are stored in the d (diagonal)
3494    submatrix, and any entries in the o locations are stored in the
3495    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3496    stored simply in the MATSEQBAIJ format for compressed row storage.
3497 
3498    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3499    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3500    In general, for PDE problems in which most nonzeros are near the diagonal,
3501    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3502    or you will get TERRIBLE performance; see the users' manual chapter on
3503    matrices.
3504 
3505    Level: intermediate
3506 
3507 .keywords: matrix, block, aij, compressed row, sparse, parallel
3508 
3509 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3510 @*/
3511 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3512 {
3513   PetscErrorCode ierr;
3514   PetscMPIInt    size;
3515 
3516   PetscFunctionBegin;
3517   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3518   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3519   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3520   if (size > 1) {
3521     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3522     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3523   } else {
3524     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3525     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3526   }
3527   PetscFunctionReturn(0);
3528 }
3529 
3530 #undef __FUNCT__
3531 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3532 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3533 {
3534   Mat            mat;
3535   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3536   PetscErrorCode ierr;
3537   PetscInt       len=0;
3538 
3539   PetscFunctionBegin;
3540   *newmat       = 0;
3541   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3542   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3543   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3544   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3545 
3546   mat->factortype   = matin->factortype;
3547   mat->preallocated = PETSC_TRUE;
3548   mat->assembled    = PETSC_TRUE;
3549   mat->insertmode   = NOT_SET_VALUES;
3550 
3551   a      = (Mat_MPIBAIJ*)mat->data;
3552   mat->rmap->bs  = matin->rmap->bs;
3553   a->bs2   = oldmat->bs2;
3554   a->mbs   = oldmat->mbs;
3555   a->nbs   = oldmat->nbs;
3556   a->Mbs   = oldmat->Mbs;
3557   a->Nbs   = oldmat->Nbs;
3558 
3559   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3560   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3561 
3562   a->size         = oldmat->size;
3563   a->rank         = oldmat->rank;
3564   a->donotstash   = oldmat->donotstash;
3565   a->roworiented  = oldmat->roworiented;
3566   a->rowindices   = 0;
3567   a->rowvalues    = 0;
3568   a->getrowactive = PETSC_FALSE;
3569   a->barray       = 0;
3570   a->rstartbs     = oldmat->rstartbs;
3571   a->rendbs       = oldmat->rendbs;
3572   a->cstartbs     = oldmat->cstartbs;
3573   a->cendbs       = oldmat->cendbs;
3574 
3575   /* hash table stuff */
3576   a->ht           = 0;
3577   a->hd           = 0;
3578   a->ht_size      = 0;
3579   a->ht_flag      = oldmat->ht_flag;
3580   a->ht_fact      = oldmat->ht_fact;
3581   a->ht_total_ct  = 0;
3582   a->ht_insert_ct = 0;
3583 
3584   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3585   if (oldmat->colmap) {
3586 #if defined (PETSC_USE_CTABLE)
3587   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3588 #else
3589   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3590   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3591   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3592 #endif
3593   } else a->colmap = 0;
3594 
3595   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3596     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3597     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3598     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3599   } else a->garray = 0;
3600 
3601   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3602   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3603   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3604   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3605   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3606 
3607   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3608   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3609   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3610   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3611   ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3612   *newmat = mat;
3613 
3614   PetscFunctionReturn(0);
3615 }
3616 
3617 #undef __FUNCT__
3618 #define __FUNCT__ "MatLoad_MPIBAIJ"
3619 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3620 {
3621   PetscErrorCode ierr;
3622   int            fd;
3623   PetscInt       i,nz,j,rstart,rend;
3624   PetscScalar    *vals,*buf;
3625   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3626   MPI_Status     status;
3627   PetscMPIInt    rank,size,maxnz;
3628   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3629   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3630   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3631   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3632   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3633   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3634 
3635   PetscFunctionBegin;
3636   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3637     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3638   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3639 
3640   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3641   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3642   if (!rank) {
3643     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3644     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3645     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3646   }
3647 
3648   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3649 
3650   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3651   M = header[1]; N = header[2];
3652 
3653   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3654   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3655   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3656 
3657   /* If global sizes are set, check if they are consistent with that given in the file */
3658   if (sizesset) {
3659     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3660   }
3661   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3662   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3663 
3664   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3665 
3666   /*
3667      This code adds extra rows to make sure the number of rows is
3668      divisible by the blocksize
3669   */
3670   Mbs        = M/bs;
3671   extra_rows = bs - M + bs*Mbs;
3672   if (extra_rows == bs) extra_rows = 0;
3673   else                  Mbs++;
3674   if (extra_rows && !rank) {
3675     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3676   }
3677 
3678   /* determine ownership of all rows */
3679   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3680     mbs        = Mbs/size + ((Mbs % size) > rank);
3681     m          = mbs*bs;
3682   } else { /* User set */
3683     m          = newmat->rmap->n;
3684     mbs        = m/bs;
3685   }
3686   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3687   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3688 
3689   /* process 0 needs enough room for process with most rows */
3690   if (!rank) {
3691     mmax = rowners[1];
3692     for (i=2; i<=size; i++) {
3693       mmax = PetscMax(mmax,rowners[i]);
3694     }
3695     mmax*=bs;
3696   } else mmax = m;
3697 
3698   rowners[0] = 0;
3699   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3700   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3701   rstart = rowners[rank];
3702   rend   = rowners[rank+1];
3703 
3704   /* distribute row lengths to all processors */
3705   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3706   if (!rank) {
3707     mend = m;
3708     if (size == 1) mend = mend - extra_rows;
3709     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3710     for (j=mend; j<m; j++) locrowlens[j] = 1;
3711     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3712     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3713     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3714     for (j=0; j<m; j++) {
3715       procsnz[0] += locrowlens[j];
3716     }
3717     for (i=1; i<size; i++) {
3718       mend = browners[i+1] - browners[i];
3719       if (i == size-1) mend = mend - extra_rows;
3720       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3721       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3722       /* calculate the number of nonzeros on each processor */
3723       for (j=0; j<browners[i+1]-browners[i]; j++) {
3724         procsnz[i] += rowlengths[j];
3725       }
3726       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3727     }
3728     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3729   } else {
3730     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3731   }
3732 
3733   if (!rank) {
3734     /* determine max buffer needed and allocate it */
3735     maxnz = procsnz[0];
3736     for (i=1; i<size; i++) {
3737       maxnz = PetscMax(maxnz,procsnz[i]);
3738     }
3739     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3740 
3741     /* read in my part of the matrix column indices  */
3742     nz     = procsnz[0];
3743     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3744     mycols = ibuf;
3745     if (size == 1)  nz -= extra_rows;
3746     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3747     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3748 
3749     /* read in every ones (except the last) and ship off */
3750     for (i=1; i<size-1; i++) {
3751       nz   = procsnz[i];
3752       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3753       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3754     }
3755     /* read in the stuff for the last proc */
3756     if (size != 1) {
3757       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3758       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3759       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3760       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3761     }
3762     ierr = PetscFree(cols);CHKERRQ(ierr);
3763   } else {
3764     /* determine buffer space needed for message */
3765     nz = 0;
3766     for (i=0; i<m; i++) {
3767       nz += locrowlens[i];
3768     }
3769     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3770     mycols = ibuf;
3771     /* receive message of column indices*/
3772     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3773     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3774     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3775   }
3776 
3777   /* loop over local rows, determining number of off diagonal entries */
3778   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3779   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3780   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3781   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3782   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3783   rowcount = 0; nzcount = 0;
3784   for (i=0; i<mbs; i++) {
3785     dcount  = 0;
3786     odcount = 0;
3787     for (j=0; j<bs; j++) {
3788       kmax = locrowlens[rowcount];
3789       for (k=0; k<kmax; k++) {
3790         tmp = mycols[nzcount++]/bs;
3791         if (!mask[tmp]) {
3792           mask[tmp] = 1;
3793           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3794           else masked1[dcount++] = tmp;
3795         }
3796       }
3797       rowcount++;
3798     }
3799 
3800     dlens[i]  = dcount;
3801     odlens[i] = odcount;
3802 
3803     /* zero out the mask elements we set */
3804     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3805     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3806   }
3807 
3808 
3809   if (!sizesset) {
3810     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3811   }
3812   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3813 
3814   if (!rank) {
3815     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3816     /* read in my part of the matrix numerical values  */
3817     nz = procsnz[0];
3818     vals = buf;
3819     mycols = ibuf;
3820     if (size == 1)  nz -= extra_rows;
3821     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3822     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3823 
3824     /* insert into matrix */
3825     jj      = rstart*bs;
3826     for (i=0; i<m; i++) {
3827       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3828       mycols += locrowlens[i];
3829       vals   += locrowlens[i];
3830       jj++;
3831     }
3832     /* read in other processors (except the last one) and ship out */
3833     for (i=1; i<size-1; i++) {
3834       nz   = procsnz[i];
3835       vals = buf;
3836       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3837       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3838     }
3839     /* the last proc */
3840     if (size != 1) {
3841       nz   = procsnz[i] - extra_rows;
3842       vals = buf;
3843       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3844       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3845       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3846     }
3847     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3848   } else {
3849     /* receive numeric values */
3850     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3851 
3852     /* receive message of values*/
3853     vals   = buf;
3854     mycols = ibuf;
3855     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3856 
3857     /* insert into matrix */
3858     jj      = rstart*bs;
3859     for (i=0; i<m; i++) {
3860       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3861       mycols += locrowlens[i];
3862       vals   += locrowlens[i];
3863       jj++;
3864     }
3865   }
3866   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3867   ierr = PetscFree(buf);CHKERRQ(ierr);
3868   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3869   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3870   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3871   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3872   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3873   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3874 
3875   PetscFunctionReturn(0);
3876 }
3877 
3878 #undef __FUNCT__
3879 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3880 /*@
3881    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3882 
3883    Input Parameters:
3884 .  mat  - the matrix
3885 .  fact - factor
3886 
3887    Not Collective, each process can use a different factor
3888 
3889    Level: advanced
3890 
3891   Notes:
3892    This can also be set by the command line option: -mat_use_hash_table <fact>
3893 
3894 .keywords: matrix, hashtable, factor, HT
3895 
3896 .seealso: MatSetOption()
3897 @*/
3898 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3899 {
3900   PetscErrorCode ierr;
3901 
3902   PetscFunctionBegin;
3903   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3904   PetscFunctionReturn(0);
3905 }
3906 
3907 EXTERN_C_BEGIN
3908 #undef __FUNCT__
3909 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3910 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3911 {
3912   Mat_MPIBAIJ *baij;
3913 
3914   PetscFunctionBegin;
3915   baij = (Mat_MPIBAIJ*)mat->data;
3916   baij->ht_fact = fact;
3917   PetscFunctionReturn(0);
3918 }
3919 EXTERN_C_END
3920 
3921 #undef __FUNCT__
3922 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3923 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3924 {
3925   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3926   PetscFunctionBegin;
3927   *Ad     = a->A;
3928   *Ao     = a->B;
3929   *colmap = a->garray;
3930   PetscFunctionReturn(0);
3931 }
3932 
3933 /*
3934     Special version for direct calls from Fortran (to eliminate two function call overheads
3935 */
3936 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3937 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3938 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3939 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3940 #endif
3941 
3942 #undef __FUNCT__
3943 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3944 /*@C
3945   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3946 
3947   Collective on Mat
3948 
3949   Input Parameters:
3950 + mat - the matrix
3951 . min - number of input rows
3952 . im - input rows
3953 . nin - number of input columns
3954 . in - input columns
3955 . v - numerical values input
3956 - addvin - INSERT_VALUES or ADD_VALUES
3957 
3958   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3959 
3960   Level: advanced
3961 
3962 .seealso:   MatSetValuesBlocked()
3963 @*/
3964 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3965 {
3966   /* convert input arguments to C version */
3967   Mat             mat = *matin;
3968   PetscInt        m = *min, n = *nin;
3969   InsertMode      addv = *addvin;
3970 
3971   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3972   const MatScalar *value;
3973   MatScalar       *barray=baij->barray;
3974   PetscBool       roworiented = baij->roworiented;
3975   PetscErrorCode  ierr;
3976   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3977   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3978   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3979 
3980   PetscFunctionBegin;
3981   /* tasks normally handled by MatSetValuesBlocked() */
3982   if (mat->insertmode == NOT_SET_VALUES) {
3983     mat->insertmode = addv;
3984   }
3985 #if defined(PETSC_USE_DEBUG)
3986   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3987   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3988 #endif
3989   if (mat->assembled) {
3990     mat->was_assembled = PETSC_TRUE;
3991     mat->assembled     = PETSC_FALSE;
3992   }
3993   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3994 
3995 
3996   if (!barray) {
3997     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3998     baij->barray = barray;
3999   }
4000 
4001   if (roworiented) {
4002     stepval = (n-1)*bs;
4003   } else {
4004     stepval = (m-1)*bs;
4005   }
4006   for (i=0; i<m; i++) {
4007     if (im[i] < 0) continue;
4008 #if defined(PETSC_USE_DEBUG)
4009     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4010 #endif
4011     if (im[i] >= rstart && im[i] < rend) {
4012       row = im[i] - rstart;
4013       for (j=0; j<n; j++) {
4014         /* If NumCol = 1 then a copy is not required */
4015         if ((roworiented) && (n == 1)) {
4016           barray = (MatScalar*)v + i*bs2;
4017         } else if ((!roworiented) && (m == 1)) {
4018           barray = (MatScalar*)v + j*bs2;
4019         } else { /* Here a copy is required */
4020           if (roworiented) {
4021             value = v + i*(stepval+bs)*bs + j*bs;
4022           } else {
4023             value = v + j*(stepval+bs)*bs + i*bs;
4024           }
4025           for (ii=0; ii<bs; ii++,value+=stepval) {
4026             for (jj=0; jj<bs; jj++) {
4027               *barray++  = *value++;
4028             }
4029           }
4030           barray -=bs2;
4031         }
4032 
4033         if (in[j] >= cstart && in[j] < cend) {
4034           col  = in[j] - cstart;
4035           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4036         }
4037         else if (in[j] < 0) continue;
4038 #if defined(PETSC_USE_DEBUG)
4039         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4040 #endif
4041         else {
4042           if (mat->was_assembled) {
4043             if (!baij->colmap) {
4044               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4045             }
4046 
4047 #if defined(PETSC_USE_DEBUG)
4048 #if defined (PETSC_USE_CTABLE)
4049             { PetscInt data;
4050               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4051               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4052             }
4053 #else
4054             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4055 #endif
4056 #endif
4057 #if defined (PETSC_USE_CTABLE)
4058             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4059             col  = (col - 1)/bs;
4060 #else
4061             col = (baij->colmap[in[j]] - 1)/bs;
4062 #endif
4063             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4064               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4065               col =  in[j];
4066             }
4067           }
4068           else col = in[j];
4069           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4070         }
4071       }
4072     } else {
4073       if (!baij->donotstash) {
4074         if (roworiented) {
4075           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4076         } else {
4077           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4078         }
4079       }
4080     }
4081   }
4082 
4083   /* task normally handled by MatSetValuesBlocked() */
4084   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4085   PetscFunctionReturn(0);
4086 }
4087 
4088 #undef __FUNCT__
4089 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4090 /*@
4091      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4092          CSR format the local rows.
4093 
4094    Collective on MPI_Comm
4095 
4096    Input Parameters:
4097 +  comm - MPI communicator
4098 .  bs - the block size, only a block size of 1 is supported
4099 .  m - number of local rows (Cannot be PETSC_DECIDE)
4100 .  n - This value should be the same as the local size used in creating the
4101        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4102        calculated if N is given) For square matrices n is almost always m.
4103 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4104 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4105 .   i - row indices
4106 .   j - column indices
4107 -   a - matrix values
4108 
4109    Output Parameter:
4110 .   mat - the matrix
4111 
4112    Level: intermediate
4113 
4114    Notes:
4115        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4116      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4117      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4118 
4119        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4120 
4121 .keywords: matrix, aij, compressed row, sparse, parallel
4122 
4123 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4124           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4125 @*/
4126 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4127 {
4128   PetscErrorCode ierr;
4129 
4130 
4131  PetscFunctionBegin;
4132   if (i[0]) {
4133     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4134   }
4135   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4136   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4137   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4138   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4139   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4140   PetscFunctionReturn(0);
4141 }
4142