xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 2dc3f860d4b6dfd94ab9540c22e71d62bbcd5121)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++){
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++){
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig){
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         } else if (in[j] < 0) continue;
229 #if defined(PETSC_USE_DEBUG)
230         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
231 #endif
232         else {
233           if (mat->was_assembled) {
234             if (!baij->colmap) {
235               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
236             }
237 #if defined (PETSC_USE_CTABLE)
238             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
239             col  = col - 1;
240 #else
241             col = baij->colmap[in[j]/bs] - 1;
242 #endif
243             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
244               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
245               col =  in[j];
246               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
247               B = baij->B;
248               b = (Mat_SeqBAIJ*)(B)->data;
249               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
250               ba=b->a;
251             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
252             else col += in[j]%bs;
253           } else col = in[j];
254           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
255           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
256           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
257         }
258       }
259     } else {
260       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
261       if (!baij->donotstash) {
262         mat->assembled = PETSC_FALSE;
263         if (roworiented) {
264           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
265         } else {
266           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
267         }
268       }
269     }
270   }
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
277 {
278   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
279   const PetscScalar *value;
280   MatScalar         *barray=baij->barray;
281   PetscBool         roworiented = baij->roworiented;
282   PetscErrorCode    ierr;
283   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
284   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
285   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
286 
287   PetscFunctionBegin;
288   if(!barray) {
289     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
290     baij->barray = barray;
291   }
292 
293   if (roworiented) {
294     stepval = (n-1)*bs;
295   } else {
296     stepval = (m-1)*bs;
297   }
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + (i*(stepval+bs) + j)*bs;
314           } else {
315 	    value = v + (j*(stepval+bs) + i)*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
318             for (jj=0; jj<bs; jj++) {
319               barray[jj]  = value[jj];
320             }
321             barray += bs;
322           }
323           barray -= bs2;
324         }
325 
326         if (in[j] >= cstart && in[j] < cend){
327           col  = in[j] - cstart;
328           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
329         }
330         else if (in[j] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
333 #endif
334         else {
335           if (mat->was_assembled) {
336             if (!baij->colmap) {
337               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
338             }
339 
340 #if defined(PETSC_USE_DEBUG)
341 #if defined (PETSC_USE_CTABLE)
342             { PetscInt data;
343               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
344               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
345             }
346 #else
347             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
348 #endif
349 #endif
350 #if defined (PETSC_USE_CTABLE)
351 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
352             col  = (col - 1)/bs;
353 #else
354             col = (baij->colmap[in[j]] - 1)/bs;
355 #endif
356             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
357               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
358               col =  in[j];
359             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
360           }
361           else col = in[j];
362           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
363         }
364       }
365     } else {
366       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
367       if (!baij->donotstash) {
368         if (roworiented) {
369           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         } else {
371           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
372         }
373       }
374     }
375   }
376   PetscFunctionReturn(0);
377 }
378 
379 #define HASH_KEY 0.6180339887
380 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
381 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 #undef __FUNCT__
384 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
385 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
386 {
387   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
388   PetscBool      roworiented = baij->roworiented;
389   PetscErrorCode ierr;
390   PetscInt       i,j,row,col;
391   PetscInt       rstart_orig=mat->rmap->rstart;
392   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
393   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
394   PetscReal      tmp;
395   MatScalar      **HD = baij->hd,value;
396 #if defined(PETSC_USE_DEBUG)
397   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
398 #endif
399 
400   PetscFunctionBegin;
401   if (v) PetscValidScalarPointer(v,6);
402   for (i=0; i<m; i++) {
403 #if defined(PETSC_USE_DEBUG)
404     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
405     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
406 #endif
407       row = im[i];
408     if (row >= rstart_orig && row < rend_orig) {
409       for (j=0; j<n; j++) {
410         col = in[j];
411         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412         /* Look up PetscInto the Hash Table */
413         key = (row/bs)*Nbs+(col/bs)+1;
414         h1  = HASH(size,key,tmp);
415 
416 
417         idx = h1;
418 #if defined(PETSC_USE_DEBUG)
419         insert_ct++;
420         total_ct++;
421         if (HT[idx] != key) {
422           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
423           if (idx == size) {
424             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
425             if (idx == h1) {
426               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
427             }
428           }
429         }
430 #else
431         if (HT[idx] != key) {
432           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
433           if (idx == size) {
434             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
435             if (idx == h1) {
436               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
437             }
438           }
439         }
440 #endif
441         /* A HASH table entry is found, so insert the values at the correct address */
442         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
443         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
444       }
445     } else {
446       if (!baij->donotstash) {
447         if (roworiented) {
448           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
449         } else {
450           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
451         }
452       }
453     }
454   }
455 #if defined(PETSC_USE_DEBUG)
456   baij->ht_total_ct = total_ct;
457   baij->ht_insert_ct = insert_ct;
458 #endif
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
465 {
466   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
467   PetscBool         roworiented = baij->roworiented;
468   PetscErrorCode    ierr;
469   PetscInt          i,j,ii,jj,row,col;
470   PetscInt          rstart=baij->rstartbs;
471   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
472   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
473   PetscReal         tmp;
474   MatScalar         **HD = baij->hd,*baij_a;
475   const PetscScalar *v_t,*value;
476 #if defined(PETSC_USE_DEBUG)
477   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
478 #endif
479 
480   PetscFunctionBegin;
481 
482   if (roworiented) {
483     stepval = (n-1)*bs;
484   } else {
485     stepval = (m-1)*bs;
486   }
487   for (i=0; i<m; i++) {
488 #if defined(PETSC_USE_DEBUG)
489     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
490     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
491 #endif
492     row   = im[i];
493     v_t   = v + i*nbs2;
494     if (row >= rstart && row < rend) {
495       for (j=0; j<n; j++) {
496         col = in[j];
497 
498         /* Look up into the Hash Table */
499         key = row*Nbs+col+1;
500         h1  = HASH(size,key,tmp);
501 
502         idx = h1;
503 #if defined(PETSC_USE_DEBUG)
504         total_ct++;
505         insert_ct++;
506        if (HT[idx] != key) {
507           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
508           if (idx == size) {
509             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
510             if (idx == h1) {
511               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
512             }
513           }
514         }
515 #else
516         if (HT[idx] != key) {
517           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
518           if (idx == size) {
519             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
520             if (idx == h1) {
521               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
522             }
523           }
524         }
525 #endif
526         baij_a = HD[idx];
527         if (roworiented) {
528           /*value = v + i*(stepval+bs)*bs + j*bs;*/
529           /* value = v + (i*(stepval+bs)+j)*bs; */
530           value = v_t;
531           v_t  += bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval) {
534               for (jj=ii; jj<bs2; jj+=bs) {
535                 baij_a[jj]  += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval) {
540               for (jj=ii; jj<bs2; jj+=bs) {
541                 baij_a[jj]  = *value++;
542               }
543             }
544           }
545         } else {
546           value = v + j*(stepval+bs)*bs + i*bs;
547           if (addv == ADD_VALUES) {
548             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
549               for (jj=0; jj<bs; jj++) {
550                 baij_a[jj]  += *value++;
551               }
552             }
553           } else {
554             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
555               for (jj=0; jj<bs; jj++) {
556                 baij_a[jj]  = *value++;
557               }
558             }
559           }
560         }
561       }
562     } else {
563       if (!baij->donotstash) {
564         if (roworiented) {
565           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         } else {
567           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
568         }
569       }
570     }
571   }
572 #if defined(PETSC_USE_DEBUG)
573   baij->ht_total_ct = total_ct;
574   baij->ht_insert_ct = insert_ct;
575 #endif
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatGetValues_MPIBAIJ"
581 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
582 {
583   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
584   PetscErrorCode ierr;
585   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
586   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
587 
588   PetscFunctionBegin;
589   for (i=0; i<m; i++) {
590     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
591     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
592     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
593       row = idxm[i] - bsrstart;
594       for (j=0; j<n; j++) {
595         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
596         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
597         if (idxn[j] >= bscstart && idxn[j] < bscend){
598           col = idxn[j] - bscstart;
599           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
600         } else {
601           if (!baij->colmap) {
602             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
603           }
604 #if defined (PETSC_USE_CTABLE)
605           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
606           data --;
607 #else
608           data = baij->colmap[idxn[j]/bs]-1;
609 #endif
610           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
611           else {
612             col  = data + idxn[j]%bs;
613             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
614           }
615         }
616       }
617     } else {
618       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
619     }
620   }
621  PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "MatNorm_MPIBAIJ"
626 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
627 {
628   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
629   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
630   PetscErrorCode ierr;
631   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
632   PetscReal      sum = 0.0;
633   MatScalar      *v;
634 
635   PetscFunctionBegin;
636   if (baij->size == 1) {
637     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
638   } else {
639     if (type == NORM_FROBENIUS) {
640       v = amat->a;
641       nz = amat->nz*bs2;
642       for (i=0; i<nz; i++) {
643 #if defined(PETSC_USE_COMPLEX)
644         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
645 #else
646         sum += (*v)*(*v); v++;
647 #endif
648       }
649       v = bmat->a;
650       nz = bmat->nz*bs2;
651       for (i=0; i<nz; i++) {
652 #if defined(PETSC_USE_COMPLEX)
653         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
654 #else
655         sum += (*v)*(*v); v++;
656 #endif
657       }
658       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
659       *nrm = PetscSqrtReal(*nrm);
660     } else if (type == NORM_1) { /* max column sum */
661       PetscReal *tmp,*tmp2;
662       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
663       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
664       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
665       v = amat->a; jj = amat->j;
666       for (i=0; i<amat->nz; i++) {
667         for (j=0; j<bs; j++){
668           col = bs*(cstart + *jj) + j; /* column index */
669           for (row=0; row<bs; row++){
670             tmp[col] += PetscAbsScalar(*v);  v++;
671           }
672         }
673         jj++;
674       }
675       v = bmat->a; jj = bmat->j;
676       for (i=0; i<bmat->nz; i++) {
677         for (j=0; j<bs; j++){
678           col = bs*garray[*jj] + j;
679           for (row=0; row<bs; row++){
680             tmp[col] += PetscAbsScalar(*v); v++;
681           }
682         }
683         jj++;
684       }
685       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
686       *nrm = 0.0;
687       for (j=0; j<mat->cmap->N; j++) {
688         if (tmp2[j] > *nrm) *nrm = tmp2[j];
689       }
690       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
691     } else if (type == NORM_INFINITY) { /* max row sum */
692       PetscReal *sums;
693       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
694       sum = 0.0;
695       for (j=0; j<amat->mbs; j++) {
696         for (row=0; row<bs; row++) sums[row] = 0.0;
697         v = amat->a + bs2*amat->i[j];
698         nz = amat->i[j+1]-amat->i[j];
699         for (i=0; i<nz; i++) {
700           for (col=0; col<bs; col++){
701             for (row=0; row<bs; row++){
702               sums[row] += PetscAbsScalar(*v); v++;
703             }
704           }
705         }
706         v = bmat->a + bs2*bmat->i[j];
707         nz = bmat->i[j+1]-bmat->i[j];
708         for (i=0; i<nz; i++) {
709           for (col=0; col<bs; col++){
710             for (row=0; row<bs; row++){
711               sums[row] += PetscAbsScalar(*v); v++;
712             }
713           }
714         }
715         for (row=0; row<bs; row++){
716           if (sums[row] > sum) sum = sums[row];
717         }
718       }
719       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
720       ierr = PetscFree(sums);CHKERRQ(ierr);
721     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
722   }
723   PetscFunctionReturn(0);
724 }
725 
726 /*
727   Creates the hash table, and sets the table
728   This table is created only once.
729   If new entried need to be added to the matrix
730   then the hash table has to be destroyed and
731   recreated.
732 */
733 #undef __FUNCT__
734 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
735 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
736 {
737   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
738   Mat            A = baij->A,B=baij->B;
739   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
740   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
741   PetscErrorCode ierr;
742   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
743   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
744   PetscInt       *HT,key;
745   MatScalar      **HD;
746   PetscReal      tmp;
747 #if defined(PETSC_USE_INFO)
748   PetscInt       ct=0,max=0;
749 #endif
750 
751   PetscFunctionBegin;
752   if (baij->ht) PetscFunctionReturn(0);
753 
754   baij->ht_size = (PetscInt)(factor*nz);
755   ht_size       = baij->ht_size;
756 
757   /* Allocate Memory for Hash Table */
758   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
759   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
760   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
761   HD   = baij->hd;
762   HT   = baij->ht;
763 
764   /* Loop Over A */
765   for (i=0; i<a->mbs; i++) {
766     for (j=ai[i]; j<ai[i+1]; j++) {
767       row = i+rstart;
768       col = aj[j]+cstart;
769 
770       key = row*Nbs + col + 1;
771       h1  = HASH(ht_size,key,tmp);
772       for (k=0; k<ht_size; k++){
773         if (!HT[(h1+k)%ht_size]) {
774           HT[(h1+k)%ht_size] = key;
775           HD[(h1+k)%ht_size] = a->a + j*bs2;
776           break;
777 #if defined(PETSC_USE_INFO)
778         } else {
779           ct++;
780 #endif
781         }
782       }
783 #if defined(PETSC_USE_INFO)
784       if (k> max) max = k;
785 #endif
786     }
787   }
788   /* Loop Over B */
789   for (i=0; i<b->mbs; i++) {
790     for (j=bi[i]; j<bi[i+1]; j++) {
791       row = i+rstart;
792       col = garray[bj[j]];
793       key = row*Nbs + col + 1;
794       h1  = HASH(ht_size,key,tmp);
795       for (k=0; k<ht_size; k++){
796         if (!HT[(h1+k)%ht_size]) {
797           HT[(h1+k)%ht_size] = key;
798           HD[(h1+k)%ht_size] = b->a + j*bs2;
799           break;
800 #if defined(PETSC_USE_INFO)
801         } else {
802           ct++;
803 #endif
804         }
805       }
806 #if defined(PETSC_USE_INFO)
807       if (k> max) max = k;
808 #endif
809     }
810   }
811 
812   /* Print Summary */
813 #if defined(PETSC_USE_INFO)
814   for (i=0,j=0; i<ht_size; i++) {
815     if (HT[i]) {j++;}
816   }
817   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
818 #endif
819   PetscFunctionReturn(0);
820 }
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
824 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
825 {
826   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
827   PetscErrorCode ierr;
828   PetscInt       nstash,reallocs;
829   InsertMode     addv;
830 
831   PetscFunctionBegin;
832   if (baij->donotstash || mat->nooffprocentries) {
833     PetscFunctionReturn(0);
834   }
835 
836   /* make sure all processors are either in INSERTMODE or ADDMODE */
837   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
838   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
839   mat->insertmode = addv; /* in case this processor had no cache */
840 
841   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
842   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
843   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
844   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
845   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
846   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
847   PetscFunctionReturn(0);
848 }
849 
850 #undef __FUNCT__
851 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
852 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
853 {
854   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
855   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
856   PetscErrorCode ierr;
857   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
858   PetscInt       *row,*col;
859   PetscBool      r1,r2,r3,other_disassembled;
860   MatScalar      *val;
861   InsertMode     addv = mat->insertmode;
862   PetscMPIInt    n;
863 
864   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
865   PetscFunctionBegin;
866   if (!baij->donotstash && !mat->nooffprocentries) {
867     while (1) {
868       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
869       if (!flg) break;
870 
871       for (i=0; i<n;) {
872         /* Now identify the consecutive vals belonging to the same row */
873         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
874         if (j < n) ncols = j-i;
875         else       ncols = n-i;
876         /* Now assemble all these values with a single function call */
877         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
878         i = j;
879       }
880     }
881     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
882     /* Now process the block-stash. Since the values are stashed column-oriented,
883        set the roworiented flag to column oriented, and after MatSetValues()
884        restore the original flags */
885     r1 = baij->roworiented;
886     r2 = a->roworiented;
887     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
888     baij->roworiented = PETSC_FALSE;
889     a->roworiented    = PETSC_FALSE;
890     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
891     while (1) {
892       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
893       if (!flg) break;
894 
895       for (i=0; i<n;) {
896         /* Now identify the consecutive vals belonging to the same row */
897         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
898         if (j < n) ncols = j-i;
899         else       ncols = n-i;
900         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
901         i = j;
902       }
903     }
904     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
905     baij->roworiented = r1;
906     a->roworiented    = r2;
907     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
908   }
909 
910   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
911   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
912 
913   /* determine if any processor has disassembled, if so we must
914      also disassemble ourselfs, in order that we may reassemble. */
915   /*
916      if nonzero structure of submatrix B cannot change then we know that
917      no processor disassembled thus we can skip this stuff
918   */
919   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
920     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
921     if (mat->was_assembled && !other_disassembled) {
922       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
923     }
924   }
925 
926   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
927     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
928   }
929   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
930   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
931   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
932 
933 #if defined(PETSC_USE_INFO)
934   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
935     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
936     baij->ht_total_ct  = 0;
937     baij->ht_insert_ct = 0;
938   }
939 #endif
940   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
941     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
942     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
943     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
944   }
945 
946   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
947   baij->rowvalues = 0;
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
953 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
954 {
955   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
956   PetscErrorCode    ierr;
957   PetscMPIInt       size = baij->size,rank = baij->rank;
958   PetscInt          bs = mat->rmap->bs;
959   PetscBool         iascii,isdraw;
960   PetscViewer       sviewer;
961   PetscViewerFormat format;
962 
963   PetscFunctionBegin;
964   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
965   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
966   if (iascii) {
967     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
968     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
969       MatInfo info;
970       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
971       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
972       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
973       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
974              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
977       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
978       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
979       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
980       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
981       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
982       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
983       PetscFunctionReturn(0);
984     } else if (format == PETSC_VIEWER_ASCII_INFO) {
985       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
986       PetscFunctionReturn(0);
987     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
988       PetscFunctionReturn(0);
989     }
990   }
991 
992   if (isdraw) {
993     PetscDraw       draw;
994     PetscBool  isnull;
995     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
996     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
997   }
998 
999   if (size == 1) {
1000     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1001     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1002   } else {
1003     /* assemble the entire matrix onto first processor. */
1004     Mat         A;
1005     Mat_SeqBAIJ *Aloc;
1006     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1007     MatScalar   *a;
1008 
1009     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1010     /* Perhaps this should be the type of mat? */
1011     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1012     if (!rank) {
1013       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1014     } else {
1015       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1016     }
1017     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1018     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1019     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1020     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1021 
1022     /* copy over the A part */
1023     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1024     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1025     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1026 
1027     for (i=0; i<mbs; i++) {
1028       rvals[0] = bs*(baij->rstartbs + i);
1029       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1030       for (j=ai[i]; j<ai[i+1]; j++) {
1031         col = (baij->cstartbs+aj[j])*bs;
1032         for (k=0; k<bs; k++) {
1033           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1034           col++; a += bs;
1035         }
1036       }
1037     }
1038     /* copy over the B part */
1039     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1040     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1041     for (i=0; i<mbs; i++) {
1042       rvals[0] = bs*(baij->rstartbs + i);
1043       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1044       for (j=ai[i]; j<ai[i+1]; j++) {
1045         col = baij->garray[aj[j]]*bs;
1046         for (k=0; k<bs; k++) {
1047           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1048           col++; a += bs;
1049         }
1050       }
1051     }
1052     ierr = PetscFree(rvals);CHKERRQ(ierr);
1053     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1054     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1055     /*
1056        Everyone has to call to draw the matrix since the graphics waits are
1057        synchronized across all processors that share the PetscDraw object
1058     */
1059     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1060     if (!rank) {
1061       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1062     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1063       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1064       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1065     }
1066     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1067     ierr = MatDestroy(&A);CHKERRQ(ierr);
1068   }
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1074 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1075 {
1076   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1077   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1078   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1079   PetscErrorCode ierr;
1080   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1081   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1082   int            fd;
1083   PetscScalar    *column_values;
1084   FILE           *file;
1085   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1086   PetscInt       message_count,flowcontrolcount;
1087 
1088   PetscFunctionBegin;
1089   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1090   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1091   nz   = bs2*(A->nz + B->nz);
1092   rlen = mat->rmap->n;
1093   if (!rank) {
1094     header[0] = MAT_FILE_CLASSID;
1095     header[1] = mat->rmap->N;
1096     header[2] = mat->cmap->N;
1097     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1098     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1099     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1100     /* get largest number of rows any processor has */
1101     range = mat->rmap->range;
1102     for (i=1; i<size; i++) {
1103       rlen = PetscMax(rlen,range[i+1] - range[i]);
1104     }
1105   } else {
1106     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1107   }
1108 
1109   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1110   /* compute lengths of each row  */
1111   for (i=0; i<a->mbs; i++) {
1112     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1113   }
1114   /* store the row lengths to the file */
1115   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1116   if (!rank) {
1117     MPI_Status status;
1118     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1119     rlen  = (range[1] - range[0])/bs;
1120     for (i=0; i<rlen; i++) {
1121       for (j=0; j<bs; j++) {
1122         row_lens[i*bs+j] = bs*crow_lens[i];
1123       }
1124     }
1125     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1126     for (i=1; i<size; i++) {
1127       rlen = (range[i+1] - range[i])/bs;
1128       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1129       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1130       for (k=0; k<rlen; k++) {
1131 	for (j=0; j<bs; j++) {
1132 	  row_lens[k*bs+j] = bs*crow_lens[k];
1133 	}
1134       }
1135       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1136     }
1137     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1138     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1139   } else {
1140     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1141     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1142     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1143   }
1144   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1145 
1146   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1147      information needed to make it for each row from a block row. This does require more communication but still not more than
1148      the communication needed for the nonzero values  */
1149   nzmax = nz; /*  space a largest processor needs */
1150   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1151   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1152   cnt  = 0;
1153   for (i=0; i<a->mbs; i++) {
1154     pcnt = cnt;
1155     for (j=B->i[i]; j<B->i[i+1]; j++) {
1156       if ( (col = garray[B->j[j]]) > cstart) break;
1157       for (l=0; l<bs; l++) {
1158 	column_indices[cnt++] = bs*col+l;
1159       }
1160     }
1161     for (k=A->i[i]; k<A->i[i+1]; k++) {
1162       for (l=0; l<bs; l++) {
1163         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1164       }
1165     }
1166     for (; j<B->i[i+1]; j++) {
1167       for (l=0; l<bs; l++) {
1168         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1169       }
1170     }
1171     len = cnt - pcnt;
1172     for (k=1; k<bs; k++) {
1173       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1174       cnt += len;
1175     }
1176   }
1177   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1178 
1179   /* store the columns to the file */
1180   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1181   if (!rank) {
1182     MPI_Status status;
1183     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1184     for (i=1; i<size; i++) {
1185       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1186       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1187       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1188       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1189     }
1190     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1191   } else {
1192     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1193     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1194     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1195     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1196   }
1197   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1198 
1199   /* load up the numerical values */
1200   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1201   cnt = 0;
1202   for (i=0; i<a->mbs; i++) {
1203     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1204     for (j=B->i[i]; j<B->i[i+1]; j++) {
1205       if ( garray[B->j[j]] > cstart) break;
1206       for (l=0; l<bs; l++) {
1207         for (ll=0; ll<bs; ll++) {
1208 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1209         }
1210       }
1211       cnt += bs;
1212     }
1213     for (k=A->i[i]; k<A->i[i+1]; k++) {
1214       for (l=0; l<bs; l++) {
1215         for (ll=0; ll<bs; ll++) {
1216           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1217         }
1218       }
1219       cnt += bs;
1220     }
1221     for (; j<B->i[i+1]; j++) {
1222       for (l=0; l<bs; l++) {
1223         for (ll=0; ll<bs; ll++) {
1224 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1225         }
1226       }
1227       cnt += bs;
1228     }
1229     cnt += (bs-1)*rlen;
1230   }
1231   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1232 
1233   /* store the column values to the file */
1234   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1235   if (!rank) {
1236     MPI_Status status;
1237     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1238     for (i=1; i<size; i++) {
1239       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1240       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1241       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1242       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1243     }
1244     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1245   } else {
1246     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1247     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1248     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1249     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1250   }
1251   ierr = PetscFree(column_values);CHKERRQ(ierr);
1252 
1253   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1254   if (file) {
1255     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatView_MPIBAIJ"
1262 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1263 {
1264   PetscErrorCode ierr;
1265   PetscBool      iascii,isdraw,issocket,isbinary;
1266 
1267   PetscFunctionBegin;
1268   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1269   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1270   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1271   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1272   if (iascii || isdraw || issocket) {
1273     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1274   } else if (isbinary) {
1275     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1276   } else {
1277     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1278   }
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #undef __FUNCT__
1283 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1284 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1285 {
1286   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1287   PetscErrorCode ierr;
1288 
1289   PetscFunctionBegin;
1290 #if defined(PETSC_USE_LOG)
1291   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1292 #endif
1293   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1294   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1295   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1296   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1297 #if defined (PETSC_USE_CTABLE)
1298   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1299 #else
1300   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1301 #endif
1302   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1303   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1304   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1305   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1306   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1307   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1308   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1309   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1310 
1311   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1318   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1319   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1320   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatMult_MPIBAIJ"
1326 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1327 {
1328   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1329   PetscErrorCode ierr;
1330   PetscInt       nt;
1331 
1332   PetscFunctionBegin;
1333   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1334   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1335   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1336   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1337   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1338   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1339   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1340   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1346 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1347 {
1348   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1349   PetscErrorCode ierr;
1350 
1351   PetscFunctionBegin;
1352   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1353   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1354   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1355   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1361 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1362 {
1363   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1364   PetscErrorCode ierr;
1365   PetscBool      merged;
1366 
1367   PetscFunctionBegin;
1368   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1369   /* do nondiagonal part */
1370   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1371   if (!merged) {
1372     /* send it on its way */
1373     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1374     /* do local part */
1375     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1376     /* receive remote parts: note this assumes the values are not actually */
1377     /* inserted in yy until the next line */
1378     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1379   } else {
1380     /* do local part */
1381     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1382     /* send it on its way */
1383     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1384     /* values actually were received in the Begin() but we need to call this nop */
1385     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1386   }
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 #undef __FUNCT__
1391 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1392 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1393 {
1394   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1395   PetscErrorCode ierr;
1396 
1397   PetscFunctionBegin;
1398   /* do nondiagonal part */
1399   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1400   /* send it on its way */
1401   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1402   /* do local part */
1403   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1404   /* receive remote parts: note this assumes the values are not actually */
1405   /* inserted in yy until the next line, which is true for my implementation*/
1406   /* but is not perhaps always true. */
1407   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 /*
1412   This only works correctly for square matrices where the subblock A->A is the
1413    diagonal block
1414 */
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1417 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1418 {
1419   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1420   PetscErrorCode ierr;
1421 
1422   PetscFunctionBegin;
1423   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1424   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatScale_MPIBAIJ"
1430 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1431 {
1432   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1433   PetscErrorCode ierr;
1434 
1435   PetscFunctionBegin;
1436   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1437   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1443 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1444 {
1445   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1446   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1447   PetscErrorCode ierr;
1448   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1449   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1450   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1451 
1452   PetscFunctionBegin;
1453   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1454   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1455   mat->getrowactive = PETSC_TRUE;
1456 
1457   if (!mat->rowvalues && (idx || v)) {
1458     /*
1459         allocate enough space to hold information from the longest row.
1460     */
1461     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1462     PetscInt     max = 1,mbs = mat->mbs,tmp;
1463     for (i=0; i<mbs; i++) {
1464       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1465       if (max < tmp) { max = tmp; }
1466     }
1467     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1468   }
1469   lrow = row - brstart;
1470 
1471   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1472   if (!v)   {pvA = 0; pvB = 0;}
1473   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1474   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1475   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1476   nztot = nzA + nzB;
1477 
1478   cmap  = mat->garray;
1479   if (v  || idx) {
1480     if (nztot) {
1481       /* Sort by increasing column numbers, assuming A and B already sorted */
1482       PetscInt imark = -1;
1483       if (v) {
1484         *v = v_p = mat->rowvalues;
1485         for (i=0; i<nzB; i++) {
1486           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1487           else break;
1488         }
1489         imark = i;
1490         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1491         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1492       }
1493       if (idx) {
1494         *idx = idx_p = mat->rowindices;
1495         if (imark > -1) {
1496           for (i=0; i<imark; i++) {
1497             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1498           }
1499         } else {
1500           for (i=0; i<nzB; i++) {
1501             if (cmap[cworkB[i]/bs] < cstart)
1502               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1503             else break;
1504           }
1505           imark = i;
1506         }
1507         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1508         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1509       }
1510     } else {
1511       if (idx) *idx = 0;
1512       if (v)   *v   = 0;
1513     }
1514   }
1515   *nz = nztot;
1516   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1517   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 #undef __FUNCT__
1522 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1523 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1524 {
1525   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1526 
1527   PetscFunctionBegin;
1528   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1529   baij->getrowactive = PETSC_FALSE;
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1535 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1536 {
1537   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1538   PetscErrorCode ierr;
1539 
1540   PetscFunctionBegin;
1541   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1542   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1548 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1549 {
1550   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1551   Mat            A = a->A,B = a->B;
1552   PetscErrorCode ierr;
1553   PetscReal      isend[5],irecv[5];
1554 
1555   PetscFunctionBegin;
1556   info->block_size     = (PetscReal)matin->rmap->bs;
1557   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1558   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1559   isend[3] = info->memory;  isend[4] = info->mallocs;
1560   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1561   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1562   isend[3] += info->memory;  isend[4] += info->mallocs;
1563   if (flag == MAT_LOCAL) {
1564     info->nz_used      = isend[0];
1565     info->nz_allocated = isend[1];
1566     info->nz_unneeded  = isend[2];
1567     info->memory       = isend[3];
1568     info->mallocs      = isend[4];
1569   } else if (flag == MAT_GLOBAL_MAX) {
1570     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1571     info->nz_used      = irecv[0];
1572     info->nz_allocated = irecv[1];
1573     info->nz_unneeded  = irecv[2];
1574     info->memory       = irecv[3];
1575     info->mallocs      = irecv[4];
1576   } else if (flag == MAT_GLOBAL_SUM) {
1577     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1578     info->nz_used      = irecv[0];
1579     info->nz_allocated = irecv[1];
1580     info->nz_unneeded  = irecv[2];
1581     info->memory       = irecv[3];
1582     info->mallocs      = irecv[4];
1583   } else {
1584     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1585   }
1586   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1587   info->fill_ratio_needed = 0;
1588   info->factor_mallocs    = 0;
1589   PetscFunctionReturn(0);
1590 }
1591 
1592 #undef __FUNCT__
1593 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1594 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1595 {
1596   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1597   PetscErrorCode ierr;
1598 
1599   PetscFunctionBegin;
1600   switch (op) {
1601   case MAT_NEW_NONZERO_LOCATIONS:
1602   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1603   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1604   case MAT_KEEP_NONZERO_PATTERN:
1605   case MAT_NEW_NONZERO_LOCATION_ERR:
1606     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1607     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1608     break;
1609   case MAT_ROW_ORIENTED:
1610     a->roworiented = flg;
1611     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1612     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1613     break;
1614   case MAT_NEW_DIAGONALS:
1615     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1616     break;
1617   case MAT_IGNORE_OFF_PROC_ENTRIES:
1618     a->donotstash = flg;
1619     break;
1620   case MAT_USE_HASH_TABLE:
1621     a->ht_flag = flg;
1622     break;
1623   case MAT_SYMMETRIC:
1624   case MAT_STRUCTURALLY_SYMMETRIC:
1625   case MAT_HERMITIAN:
1626   case MAT_SYMMETRY_ETERNAL:
1627     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1628     break;
1629   default:
1630     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1631   }
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 #undef __FUNCT__
1636 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1637 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1638 {
1639   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1640   Mat_SeqBAIJ    *Aloc;
1641   Mat            B;
1642   PetscErrorCode ierr;
1643   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1644   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1645   MatScalar      *a;
1646 
1647   PetscFunctionBegin;
1648   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1649   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1650     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1651     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1652     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1653     /* Do not know preallocation information, but must set block size */
1654     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr);
1655   } else {
1656     B = *matout;
1657   }
1658 
1659   /* copy over the A part */
1660   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1661   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1662   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1663 
1664   for (i=0; i<mbs; i++) {
1665     rvals[0] = bs*(baij->rstartbs + i);
1666     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1667     for (j=ai[i]; j<ai[i+1]; j++) {
1668       col = (baij->cstartbs+aj[j])*bs;
1669       for (k=0; k<bs; k++) {
1670         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1671         col++; a += bs;
1672       }
1673     }
1674   }
1675   /* copy over the B part */
1676   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1677   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1678   for (i=0; i<mbs; i++) {
1679     rvals[0] = bs*(baij->rstartbs + i);
1680     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1681     for (j=ai[i]; j<ai[i+1]; j++) {
1682       col = baij->garray[aj[j]]*bs;
1683       for (k=0; k<bs; k++) {
1684         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1685         col++; a += bs;
1686       }
1687     }
1688   }
1689   ierr = PetscFree(rvals);CHKERRQ(ierr);
1690   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1692 
1693   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1694     *matout = B;
1695   } else {
1696     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1697   }
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1703 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1704 {
1705   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1706   Mat            a = baij->A,b = baij->B;
1707   PetscErrorCode ierr;
1708   PetscInt       s1,s2,s3;
1709 
1710   PetscFunctionBegin;
1711   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1712   if (rr) {
1713     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1714     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1715     /* Overlap communication with computation. */
1716     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1717   }
1718   if (ll) {
1719     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1720     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1721     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1722   }
1723   /* scale  the diagonal block */
1724   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1725 
1726   if (rr) {
1727     /* Do a scatter end and then right scale the off-diagonal block */
1728     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1729     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1730   }
1731 
1732   PetscFunctionReturn(0);
1733 }
1734 
1735 #undef __FUNCT__
1736 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1737 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1738 {
1739   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1740   PetscErrorCode    ierr;
1741   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1742   PetscInt          i,*owners = A->rmap->range;
1743   PetscInt          *nprocs,j,idx,nsends,row;
1744   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1745   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1746   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1747   MPI_Comm          comm = ((PetscObject)A)->comm;
1748   MPI_Request       *send_waits,*recv_waits;
1749   MPI_Status        recv_status,*send_status;
1750   const PetscScalar *xx;
1751   PetscScalar       *bb;
1752 #if defined(PETSC_DEBUG)
1753   PetscBool         found = PETSC_FALSE;
1754 #endif
1755 
1756   PetscFunctionBegin;
1757   /*  first count number of contributors to each processor */
1758   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1759   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1760   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1761   j = 0;
1762   for (i=0; i<N; i++) {
1763     if (lastidx > (idx = rows[i])) j = 0;
1764     lastidx = idx;
1765     for (; j<size; j++) {
1766       if (idx >= owners[j] && idx < owners[j+1]) {
1767         nprocs[2*j]++;
1768         nprocs[2*j+1] = 1;
1769         owner[i] = j;
1770 #if defined(PETSC_DEBUG)
1771         found = PETSC_TRUE;
1772 #endif
1773         break;
1774       }
1775     }
1776 #if defined(PETSC_DEBUG)
1777     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1778     found = PETSC_FALSE;
1779 #endif
1780   }
1781   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1782 
1783   if (A->nooffproczerorows) {
1784     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1785     nrecvs = nsends;
1786     nmax   = N;
1787   } else {
1788     /* inform other processors of number of messages and max length*/
1789     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1790   }
1791 
1792   /* post receives:   */
1793   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1794   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1795   for (i=0; i<nrecvs; i++) {
1796     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1797   }
1798 
1799   /* do sends:
1800      1) starts[i] gives the starting index in svalues for stuff going to
1801      the ith processor
1802   */
1803   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1804   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1805   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1806   starts[0]  = 0;
1807   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1808   for (i=0; i<N; i++) {
1809     svalues[starts[owner[i]]++] = rows[i];
1810   }
1811 
1812   starts[0] = 0;
1813   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1814   count = 0;
1815   for (i=0; i<size; i++) {
1816     if (nprocs[2*i+1]) {
1817       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1818     }
1819   }
1820   ierr = PetscFree(starts);CHKERRQ(ierr);
1821 
1822   base = owners[rank];
1823 
1824   /*  wait on receives */
1825   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1826   count  = nrecvs;
1827   slen = 0;
1828   while (count) {
1829     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1830     /* unpack receives into our local space */
1831     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1832     source[imdex]  = recv_status.MPI_SOURCE;
1833     lens[imdex]    = n;
1834     slen          += n;
1835     count--;
1836   }
1837   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1838 
1839   /* move the data into the send scatter */
1840   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1841   count = 0;
1842   for (i=0; i<nrecvs; i++) {
1843     values = rvalues + i*nmax;
1844     for (j=0; j<lens[i]; j++) {
1845       lrows[count++] = values[j] - base;
1846     }
1847   }
1848   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1849   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1850   ierr = PetscFree(owner);CHKERRQ(ierr);
1851   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1852 
1853   /* fix right hand side if needed */
1854   if (x && b) {
1855     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1856     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1857     for (i=0; i<slen; i++) {
1858       bb[lrows[i]] = diag*xx[lrows[i]];
1859     }
1860     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1861     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1862   }
1863 
1864   /* actually zap the local rows */
1865   /*
1866         Zero the required rows. If the "diagonal block" of the matrix
1867      is square and the user wishes to set the diagonal we use separate
1868      code so that MatSetValues() is not called for each diagonal allocating
1869      new memory, thus calling lots of mallocs and slowing things down.
1870 
1871   */
1872   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1873   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1874   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1875     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1876   } else if (diag != 0.0) {
1877     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1878     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1879        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1880     for (i=0; i<slen; i++) {
1881       row  = lrows[i] + rstart_bs;
1882       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1883     }
1884     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1885     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1886   } else {
1887     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1888   }
1889 
1890   ierr = PetscFree(lrows);CHKERRQ(ierr);
1891 
1892   /* wait on sends */
1893   if (nsends) {
1894     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1895     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1896     ierr = PetscFree(send_status);CHKERRQ(ierr);
1897   }
1898   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1899   ierr = PetscFree(svalues);CHKERRQ(ierr);
1900 
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 #undef __FUNCT__
1905 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1906 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1907 {
1908   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1909   PetscErrorCode ierr;
1910 
1911   PetscFunctionBegin;
1912   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatEqual_MPIBAIJ"
1920 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1921 {
1922   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1923   Mat            a,b,c,d;
1924   PetscBool      flg;
1925   PetscErrorCode ierr;
1926 
1927   PetscFunctionBegin;
1928   a = matA->A; b = matA->B;
1929   c = matB->A; d = matB->B;
1930 
1931   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1932   if (flg) {
1933     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1934   }
1935   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatCopy_MPIBAIJ"
1941 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1942 {
1943   PetscErrorCode ierr;
1944   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1945   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1946 
1947   PetscFunctionBegin;
1948   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1949   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1950     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1951   } else {
1952     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1953     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1954   }
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 #undef __FUNCT__
1959 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1960 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1961 {
1962   PetscErrorCode ierr;
1963 
1964   PetscFunctionBegin;
1965   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 #undef __FUNCT__
1970 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1971 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1972 {
1973   PetscErrorCode ierr;
1974   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1975   PetscBLASInt   bnz,one=1;
1976   Mat_SeqBAIJ    *x,*y;
1977 
1978   PetscFunctionBegin;
1979   if (str == SAME_NONZERO_PATTERN) {
1980     PetscScalar alpha = a;
1981     x = (Mat_SeqBAIJ *)xx->A->data;
1982     y = (Mat_SeqBAIJ *)yy->A->data;
1983     bnz = PetscBLASIntCast(x->nz);
1984     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1985     x = (Mat_SeqBAIJ *)xx->B->data;
1986     y = (Mat_SeqBAIJ *)yy->B->data;
1987     bnz = PetscBLASIntCast(x->nz);
1988     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1989   } else {
1990     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1991   }
1992   PetscFunctionReturn(0);
1993 }
1994 
1995 #undef __FUNCT__
1996 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1997 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1998 {
1999   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2000   PetscErrorCode ierr;
2001 
2002   PetscFunctionBegin;
2003   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2004   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2005   PetscFunctionReturn(0);
2006 }
2007 
2008 #undef __FUNCT__
2009 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2010 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2011 {
2012   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2013   PetscErrorCode ierr;
2014 
2015   PetscFunctionBegin;
2016   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2017   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 #undef __FUNCT__
2022 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2023 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2024 {
2025   PetscErrorCode ierr;
2026   IS             iscol_local;
2027   PetscInt       csize;
2028 
2029   PetscFunctionBegin;
2030   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2031   if (call == MAT_REUSE_MATRIX) {
2032     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2033     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2034   } else {
2035     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2036   }
2037   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2038   if (call == MAT_INITIAL_MATRIX) {
2039     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2040     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2041   }
2042   PetscFunctionReturn(0);
2043 }
2044 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2045 #undef __FUNCT__
2046 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2047 /*
2048   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2049   in local and then by concatenating the local matrices the end result.
2050   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2051 */
2052 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2053 {
2054   PetscErrorCode ierr;
2055   PetscMPIInt    rank,size;
2056   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2057   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2058   Mat            M,Mreuse;
2059   MatScalar      *vwork,*aa;
2060   MPI_Comm       comm = ((PetscObject)mat)->comm;
2061   IS             isrow_new, iscol_new;
2062   PetscBool      idflag,allrows, allcols;
2063   Mat_SeqBAIJ    *aij;
2064 
2065 
2066   PetscFunctionBegin;
2067   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2068   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2069   /* The compression and expansion should be avoided. Doesn't point
2070      out errors, might change the indices, hence buggey */
2071   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2072   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2073 
2074   /* Check for special case: each processor gets entire matrix columns */
2075   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2076   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2077   if (idflag && ncol == mat->cmap->N){
2078     allcols = PETSC_TRUE;
2079   } else {
2080     allcols = PETSC_FALSE;
2081   }
2082 
2083   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2084   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2085   if (idflag && nrow == mat->rmap->N){
2086     allrows = PETSC_TRUE;
2087   } else {
2088     allrows = PETSC_FALSE;
2089   }
2090   if (call ==  MAT_REUSE_MATRIX) {
2091     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2092     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2093     ierr  = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2094   } else {
2095     ierr   = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2096   }
2097   ierr = ISDestroy(&isrow_new); CHKERRQ(ierr);
2098   ierr = ISDestroy(&iscol_new); CHKERRQ(ierr);
2099   /*
2100       m - number of local rows
2101       n - number of columns (same on all processors)
2102       rstart - first row in new global matrix generated
2103   */
2104   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2105   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2106   m    = m/bs;
2107   n    = n/bs;
2108 
2109   if (call == MAT_INITIAL_MATRIX) {
2110     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2111     ii  = aij->i;
2112     jj  = aij->j;
2113 
2114     /*
2115         Determine the number of non-zeros in the diagonal and off-diagonal
2116         portions of the matrix in order to do correct preallocation
2117     */
2118 
2119     /* first get start and end of "diagonal" columns */
2120     if (csize == PETSC_DECIDE) {
2121       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2122       if (mglobal == n*bs) { /* square matrix */
2123 	nlocal = m;
2124       } else {
2125         nlocal = n/size + ((n % size) > rank);
2126       }
2127     } else {
2128       nlocal = csize/bs;
2129     }
2130     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2131     rstart = rend - nlocal;
2132     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2133 
2134     /* next, compute all the lengths */
2135     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2136     olens = dlens + m;
2137     for (i=0; i<m; i++) {
2138       jend = ii[i+1] - ii[i];
2139       olen = 0;
2140       dlen = 0;
2141       for (j=0; j<jend; j++) {
2142         if (*jj < rstart || *jj >= rend) olen++;
2143         else dlen++;
2144         jj++;
2145       }
2146       olens[i] = olen;
2147       dlens[i] = dlen;
2148     }
2149     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2150     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2151     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2152     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2153     ierr = PetscFree(dlens);CHKERRQ(ierr);
2154   } else {
2155     PetscInt ml,nl;
2156 
2157     M = *newmat;
2158     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2159     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2160     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2161     /*
2162          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2163        rather than the slower MatSetValues().
2164     */
2165     M->was_assembled = PETSC_TRUE;
2166     M->assembled     = PETSC_FALSE;
2167   }
2168   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2169   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2170   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2171   ii  = aij->i;
2172   jj  = aij->j;
2173   aa  = aij->a;
2174   for (i=0; i<m; i++) {
2175     row   = rstart/bs + i;
2176     nz    = ii[i+1] - ii[i];
2177     cwork = jj;     jj += nz;
2178     vwork = aa;     aa += nz;
2179     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2180   }
2181 
2182   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2183   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2184   *newmat = M;
2185 
2186   /* save submatrix used in processor for next request */
2187   if (call ==  MAT_INITIAL_MATRIX) {
2188     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2189     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2190   }
2191 
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatPermute_MPIBAIJ"
2197 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2198 {
2199   MPI_Comm       comm,pcomm;
2200   PetscInt       first,local_size,nrows;
2201   const PetscInt *rows;
2202   PetscMPIInt    size;
2203   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2204   PetscErrorCode ierr;
2205 
2206   PetscFunctionBegin;
2207   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2208   /* make a collective version of 'rowp' */
2209   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2210   if (pcomm==comm) {
2211     crowp = rowp;
2212   } else {
2213     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2214     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2215     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2216     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2217   }
2218   /* collect the global row permutation and invert it */
2219   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2220   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2221   if (pcomm!=comm) {
2222     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2223   }
2224   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2225   /* get the local target indices */
2226   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2227   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2228   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2229   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2230   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2231   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2232   /* the column permutation is so much easier;
2233      make a local version of 'colp' and invert it */
2234   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2235   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2236   if (size==1) {
2237     lcolp = colp;
2238   } else {
2239     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2240     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2241     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2242   }
2243   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2244   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2245   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2246   if (size>1) {
2247     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2248     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2249   }
2250   /* now we just get the submatrix */
2251   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2252   /* clean up */
2253   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2254   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 #undef __FUNCT__
2259 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2260 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2261 {
2262   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2263   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2264 
2265   PetscFunctionBegin;
2266   if (nghosts) { *nghosts = B->nbs;}
2267   if (ghosts) {*ghosts = baij->garray;}
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2272 
2273 #undef __FUNCT__
2274 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2275 /*
2276     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2277 */
2278 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2279 {
2280   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2281   PetscErrorCode        ierr;
2282   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2283   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2284   const PetscInt        *is;
2285   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2286   PetscInt              *rowhit,M,cstart,cend,colb;
2287   PetscInt              *columnsforrow,l;
2288   IS                    *isa;
2289   PetscBool              done,flg;
2290   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2291   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2292 
2293   PetscFunctionBegin;
2294   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2295   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2296 
2297   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2298   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2299   M                = mat->rmap->n/bs;
2300   cstart           = mat->cmap->rstart/bs;
2301   cend             = mat->cmap->rend/bs;
2302   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2303   c->N             = mat->cmap->N/bs;
2304   c->m             = mat->rmap->n/bs;
2305   c->rstart        = mat->rmap->rstart/bs;
2306 
2307   c->ncolors       = nis;
2308   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2309   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2310   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2311   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2312   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2313   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2314 
2315   /* Allow access to data structures of local part of matrix */
2316   if (!baij->colmap) {
2317     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2318   }
2319   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2320   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2321 
2322   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2323   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2324 
2325   for (i=0; i<nis; i++) {
2326     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2327     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2328     c->ncolumns[i] = n;
2329     if (n) {
2330       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2331       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2332       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2333     } else {
2334       c->columns[i]  = 0;
2335     }
2336 
2337     if (ctype == IS_COLORING_GLOBAL){
2338       /* Determine the total (parallel) number of columns of this color */
2339       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2340       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2341 
2342       nn   = PetscMPIIntCast(n);
2343       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2344       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2345       if (!nctot) {
2346         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2347       }
2348 
2349       disp[0] = 0;
2350       for (j=1; j<size; j++) {
2351         disp[j] = disp[j-1] + ncolsonproc[j-1];
2352       }
2353 
2354       /* Get complete list of columns for color on each processor */
2355       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2356       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2357       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2358     } else if (ctype == IS_COLORING_GHOSTED){
2359       /* Determine local number of columns of this color on this process, including ghost points */
2360       nctot = n;
2361       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2362       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2363     } else {
2364       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2365     }
2366 
2367     /*
2368        Mark all rows affect by these columns
2369     */
2370     /* Temporary option to allow for debugging/testing */
2371     flg  = PETSC_FALSE;
2372     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2373     if (!flg) {/*-----------------------------------------------------------------------------*/
2374       /* crude, fast version */
2375       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2376       /* loop over columns*/
2377       for (j=0; j<nctot; j++) {
2378         if (ctype == IS_COLORING_GHOSTED) {
2379           col = ltog[cols[j]];
2380         } else {
2381           col  = cols[j];
2382         }
2383         if (col >= cstart && col < cend) {
2384           /* column is in diagonal block of matrix */
2385           rows = A_cj + A_ci[col-cstart];
2386           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2387         } else {
2388 #if defined (PETSC_USE_CTABLE)
2389           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2390 	  colb --;
2391 #else
2392           colb = baij->colmap[col] - 1;
2393 #endif
2394           if (colb == -1) {
2395             m = 0;
2396           } else {
2397             colb = colb/bs;
2398             rows = B_cj + B_ci[colb];
2399             m    = B_ci[colb+1] - B_ci[colb];
2400           }
2401         }
2402         /* loop over columns marking them in rowhit */
2403         for (k=0; k<m; k++) {
2404           rowhit[*rows++] = col + 1;
2405         }
2406       }
2407 
2408       /* count the number of hits */
2409       nrows = 0;
2410       for (j=0; j<M; j++) {
2411         if (rowhit[j]) nrows++;
2412       }
2413       c->nrows[i]         = nrows;
2414       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2415       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2416       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2417       nrows = 0;
2418       for (j=0; j<M; j++) {
2419         if (rowhit[j]) {
2420           c->rows[i][nrows]           = j;
2421           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2422           nrows++;
2423         }
2424       }
2425     } else {/*-------------------------------------------------------------------------------*/
2426       /* slow version, using rowhit as a linked list */
2427       PetscInt currentcol,fm,mfm;
2428       rowhit[M] = M;
2429       nrows     = 0;
2430       /* loop over columns*/
2431       for (j=0; j<nctot; j++) {
2432         if (ctype == IS_COLORING_GHOSTED) {
2433           col = ltog[cols[j]];
2434         } else {
2435           col  = cols[j];
2436         }
2437         if (col >= cstart && col < cend) {
2438           /* column is in diagonal block of matrix */
2439           rows = A_cj + A_ci[col-cstart];
2440           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2441         } else {
2442 #if defined (PETSC_USE_CTABLE)
2443           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2444           colb --;
2445 #else
2446           colb = baij->colmap[col] - 1;
2447 #endif
2448           if (colb == -1) {
2449             m = 0;
2450           } else {
2451             colb = colb/bs;
2452             rows = B_cj + B_ci[colb];
2453             m    = B_ci[colb+1] - B_ci[colb];
2454           }
2455         }
2456 
2457         /* loop over columns marking them in rowhit */
2458         fm    = M; /* fm points to first entry in linked list */
2459         for (k=0; k<m; k++) {
2460           currentcol = *rows++;
2461 	  /* is it already in the list? */
2462           do {
2463             mfm  = fm;
2464             fm   = rowhit[fm];
2465           } while (fm < currentcol);
2466           /* not in list so add it */
2467           if (fm != currentcol) {
2468             nrows++;
2469             columnsforrow[currentcol] = col;
2470             /* next three lines insert new entry into linked list */
2471             rowhit[mfm]               = currentcol;
2472             rowhit[currentcol]        = fm;
2473             fm                        = currentcol;
2474             /* fm points to present position in list since we know the columns are sorted */
2475           } else {
2476             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2477           }
2478         }
2479       }
2480       c->nrows[i]         = nrows;
2481       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2482       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2483       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2484       /* now store the linked list of rows into c->rows[i] */
2485       nrows = 0;
2486       fm    = rowhit[M];
2487       do {
2488         c->rows[i][nrows]            = fm;
2489         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2490         fm                           = rowhit[fm];
2491       } while (fm < M);
2492     } /* ---------------------------------------------------------------------------------------*/
2493     ierr = PetscFree(cols);CHKERRQ(ierr);
2494   }
2495 
2496   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2497   /*
2498        vscale will contain the "diagonal" on processor scalings followed by the off processor
2499   */
2500   if (ctype == IS_COLORING_GLOBAL) {
2501     PetscInt *garray;
2502     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2503     for (i=0; i<baij->B->cmap->n/bs; i++) {
2504       for (j=0; j<bs; j++) {
2505         garray[i*bs+j] = bs*baij->garray[i]+j;
2506       }
2507     }
2508     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2509     ierr = PetscFree(garray);CHKERRQ(ierr);
2510     CHKMEMQ;
2511     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2512     for (k=0; k<c->ncolors; k++) {
2513       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2514       for (l=0; l<c->nrows[k]; l++) {
2515         col = c->columnsforrow[k][l];
2516         if (col >= cstart && col < cend) {
2517           /* column is in diagonal block of matrix */
2518           colb = col - cstart;
2519         } else {
2520           /* column  is in "off-processor" part */
2521 #if defined (PETSC_USE_CTABLE)
2522           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2523           colb --;
2524 #else
2525           colb = baij->colmap[col] - 1;
2526 #endif
2527           colb = colb/bs;
2528           colb += cend - cstart;
2529         }
2530         c->vscaleforrow[k][l] = colb;
2531       }
2532     }
2533   } else if (ctype == IS_COLORING_GHOSTED) {
2534     /* Get gtol mapping */
2535     PetscInt N = mat->cmap->N, *gtol;
2536     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2537     for (i=0; i<N; i++) gtol[i] = -1;
2538     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2539 
2540     c->vscale = 0; /* will be created in MatFDColoringApply() */
2541     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2542     for (k=0; k<c->ncolors; k++) {
2543       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2544       for (l=0; l<c->nrows[k]; l++) {
2545         col = c->columnsforrow[k][l];      /* global column index */
2546         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2547       }
2548     }
2549     ierr = PetscFree(gtol);CHKERRQ(ierr);
2550   }
2551   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2552 
2553   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2554   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2555   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2556   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2557     CHKMEMQ;
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2563 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2564 {
2565   Mat            B;
2566   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2567   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2568   Mat_SeqAIJ     *b;
2569   PetscErrorCode ierr;
2570   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2571   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2572   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2573 
2574   PetscFunctionBegin;
2575   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2576   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2577 
2578   /* ----------------------------------------------------------------
2579      Tell every processor the number of nonzeros per row
2580   */
2581   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2582   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2583     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2584   }
2585   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2586   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2587   displs     = recvcounts + size;
2588   for (i=0; i<size; i++) {
2589     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2590     displs[i]     = A->rmap->range[i]/bs;
2591   }
2592 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2593   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2594 #else
2595   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2596 #endif
2597   /* ---------------------------------------------------------------
2598      Create the sequential matrix of the same type as the local block diagonal
2599   */
2600   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2601   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2602   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2603   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2604   b = (Mat_SeqAIJ *)B->data;
2605 
2606   /*--------------------------------------------------------------------
2607     Copy my part of matrix column indices over
2608   */
2609   sendcount  = ad->nz + bd->nz;
2610   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2611   a_jsendbuf = ad->j;
2612   b_jsendbuf = bd->j;
2613   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2614   cnt        = 0;
2615   for (i=0; i<n; i++) {
2616 
2617     /* put in lower diagonal portion */
2618     m = bd->i[i+1] - bd->i[i];
2619     while (m > 0) {
2620       /* is it above diagonal (in bd (compressed) numbering) */
2621       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2622       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2623       m--;
2624     }
2625 
2626     /* put in diagonal portion */
2627     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2628       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2629     }
2630 
2631     /* put in upper diagonal portion */
2632     while (m-- > 0) {
2633       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2634     }
2635   }
2636   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2637 
2638   /*--------------------------------------------------------------------
2639     Gather all column indices to all processors
2640   */
2641   for (i=0; i<size; i++) {
2642     recvcounts[i] = 0;
2643     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2644       recvcounts[i] += lens[j];
2645     }
2646   }
2647   displs[0]  = 0;
2648   for (i=1; i<size; i++) {
2649     displs[i] = displs[i-1] + recvcounts[i-1];
2650   }
2651 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2652   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2653 #else
2654   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2655 #endif
2656   /*--------------------------------------------------------------------
2657     Assemble the matrix into useable form (note numerical values not yet set)
2658   */
2659   /* set the b->ilen (length of each row) values */
2660   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2661   /* set the b->i indices */
2662   b->i[0] = 0;
2663   for (i=1; i<=A->rmap->N/bs; i++) {
2664     b->i[i] = b->i[i-1] + lens[i-1];
2665   }
2666   ierr = PetscFree(lens);CHKERRQ(ierr);
2667   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2668   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2669   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2670 
2671   if (A->symmetric){
2672     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2673   } else if (A->hermitian) {
2674     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2675   } else if (A->structurally_symmetric) {
2676     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2677   }
2678   *newmat = B;
2679   PetscFunctionReturn(0);
2680 }
2681 
2682 #undef __FUNCT__
2683 #define __FUNCT__ "MatSOR_MPIBAIJ"
2684 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2685 {
2686   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2687   PetscErrorCode ierr;
2688   Vec            bb1 = 0;
2689 
2690   PetscFunctionBegin;
2691   if (flag == SOR_APPLY_UPPER) {
2692     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2693     PetscFunctionReturn(0);
2694   }
2695 
2696   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2697     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2698   }
2699 
2700   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2701     if (flag & SOR_ZERO_INITIAL_GUESS) {
2702       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2703       its--;
2704     }
2705 
2706     while (its--) {
2707       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2708       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2709 
2710       /* update rhs: bb1 = bb - B*x */
2711       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2712       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2713 
2714       /* local sweep */
2715       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2716     }
2717   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2718     if (flag & SOR_ZERO_INITIAL_GUESS) {
2719       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2720       its--;
2721     }
2722     while (its--) {
2723       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2724       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2725 
2726       /* update rhs: bb1 = bb - B*x */
2727       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2728       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2729 
2730       /* local sweep */
2731       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2732     }
2733   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2734     if (flag & SOR_ZERO_INITIAL_GUESS) {
2735       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2736       its--;
2737     }
2738     while (its--) {
2739       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2740       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2741 
2742       /* update rhs: bb1 = bb - B*x */
2743       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2744       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2745 
2746       /* local sweep */
2747       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2748     }
2749   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2750 
2751   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2752   PetscFunctionReturn(0);
2753 }
2754 
2755 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2756 
2757 #undef __FUNCT__
2758 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2759 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2760 {
2761   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2762   PetscErrorCode ierr;
2763 
2764   PetscFunctionBegin;
2765   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2766   PetscFunctionReturn(0);
2767 }
2768 
2769 
2770 /* -------------------------------------------------------------------*/
2771 static struct _MatOps MatOps_Values = {
2772        MatSetValues_MPIBAIJ,
2773        MatGetRow_MPIBAIJ,
2774        MatRestoreRow_MPIBAIJ,
2775        MatMult_MPIBAIJ,
2776 /* 4*/ MatMultAdd_MPIBAIJ,
2777        MatMultTranspose_MPIBAIJ,
2778        MatMultTransposeAdd_MPIBAIJ,
2779        0,
2780        0,
2781        0,
2782 /*10*/ 0,
2783        0,
2784        0,
2785        MatSOR_MPIBAIJ,
2786        MatTranspose_MPIBAIJ,
2787 /*15*/ MatGetInfo_MPIBAIJ,
2788        MatEqual_MPIBAIJ,
2789        MatGetDiagonal_MPIBAIJ,
2790        MatDiagonalScale_MPIBAIJ,
2791        MatNorm_MPIBAIJ,
2792 /*20*/ MatAssemblyBegin_MPIBAIJ,
2793        MatAssemblyEnd_MPIBAIJ,
2794        MatSetOption_MPIBAIJ,
2795        MatZeroEntries_MPIBAIJ,
2796 /*24*/ MatZeroRows_MPIBAIJ,
2797        0,
2798        0,
2799        0,
2800        0,
2801 /*29*/ MatSetUp_MPIBAIJ,
2802        0,
2803        0,
2804        0,
2805        0,
2806 /*34*/ MatDuplicate_MPIBAIJ,
2807        0,
2808        0,
2809        0,
2810        0,
2811 /*39*/ MatAXPY_MPIBAIJ,
2812        MatGetSubMatrices_MPIBAIJ,
2813        MatIncreaseOverlap_MPIBAIJ,
2814        MatGetValues_MPIBAIJ,
2815        MatCopy_MPIBAIJ,
2816 /*44*/ 0,
2817        MatScale_MPIBAIJ,
2818        0,
2819        0,
2820        0,
2821 /*49*/ 0,
2822        0,
2823        0,
2824        0,
2825        0,
2826 /*54*/ MatFDColoringCreate_MPIBAIJ,
2827        0,
2828        MatSetUnfactored_MPIBAIJ,
2829        MatPermute_MPIBAIJ,
2830        MatSetValuesBlocked_MPIBAIJ,
2831 /*59*/ MatGetSubMatrix_MPIBAIJ,
2832        MatDestroy_MPIBAIJ,
2833        MatView_MPIBAIJ,
2834        0,
2835        0,
2836 /*64*/ 0,
2837        0,
2838        0,
2839        0,
2840        0,
2841 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2842        0,
2843        0,
2844        0,
2845        0,
2846 /*74*/ 0,
2847        MatFDColoringApply_BAIJ,
2848        0,
2849        0,
2850        0,
2851 /*79*/ 0,
2852        0,
2853        0,
2854        0,
2855        MatLoad_MPIBAIJ,
2856 /*84*/ 0,
2857        0,
2858        0,
2859        0,
2860        0,
2861 /*89*/ 0,
2862        0,
2863        0,
2864        0,
2865        0,
2866 /*94*/ 0,
2867        0,
2868        0,
2869        0,
2870        0,
2871 /*99*/ 0,
2872        0,
2873        0,
2874        0,
2875        0,
2876 /*104*/0,
2877        MatRealPart_MPIBAIJ,
2878        MatImaginaryPart_MPIBAIJ,
2879        0,
2880        0,
2881 /*109*/0,
2882        0,
2883        0,
2884        0,
2885        0,
2886 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2887        0,
2888        MatGetGhosts_MPIBAIJ,
2889        0,
2890        0,
2891 /*119*/0,
2892        0,
2893        0,
2894        0,
2895        0,
2896 /*124*/0,
2897        0,
2898        MatInvertBlockDiagonal_MPIBAIJ
2899 };
2900 
2901 EXTERN_C_BEGIN
2902 #undef __FUNCT__
2903 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2904 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2905 {
2906   PetscFunctionBegin;
2907   *a = ((Mat_MPIBAIJ *)A->data)->A;
2908   PetscFunctionReturn(0);
2909 }
2910 EXTERN_C_END
2911 
2912 EXTERN_C_BEGIN
2913 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2914 EXTERN_C_END
2915 
2916 EXTERN_C_BEGIN
2917 #undef __FUNCT__
2918 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2919 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2920 {
2921   PetscInt       m,rstart,cstart,cend;
2922   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2923   const PetscInt *JJ=0;
2924   PetscScalar    *values=0;
2925   PetscErrorCode ierr;
2926 
2927   PetscFunctionBegin;
2928   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2929   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2930   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2931   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2932   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2933   m      = B->rmap->n/bs;
2934   rstart = B->rmap->rstart/bs;
2935   cstart = B->cmap->rstart/bs;
2936   cend   = B->cmap->rend/bs;
2937 
2938   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2939   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2940   for (i=0; i<m; i++) {
2941     nz = ii[i+1] - ii[i];
2942     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2943     nz_max = PetscMax(nz_max,nz);
2944     JJ  = jj + ii[i];
2945     for (j=0; j<nz; j++) {
2946       if (*JJ >= cstart) break;
2947       JJ++;
2948     }
2949     d = 0;
2950     for (; j<nz; j++) {
2951       if (*JJ++ >= cend) break;
2952       d++;
2953     }
2954     d_nnz[i] = d;
2955     o_nnz[i] = nz - d;
2956   }
2957   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2958   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2959 
2960   values = (PetscScalar*)V;
2961   if (!values) {
2962     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2963     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2964   }
2965   for (i=0; i<m; i++) {
2966     PetscInt          row    = i + rstart;
2967     PetscInt          ncols  = ii[i+1] - ii[i];
2968     const PetscInt    *icols = jj + ii[i];
2969     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2970     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2971   }
2972 
2973   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2974   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2975   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2976   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2977   PetscFunctionReturn(0);
2978 }
2979 EXTERN_C_END
2980 
2981 #undef __FUNCT__
2982 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2983 /*@C
2984    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2985    (the default parallel PETSc format).
2986 
2987    Collective on MPI_Comm
2988 
2989    Input Parameters:
2990 +  A - the matrix
2991 .  bs - the block size
2992 .  i - the indices into j for the start of each local row (starts with zero)
2993 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2994 -  v - optional values in the matrix
2995 
2996    Level: developer
2997 
2998 .keywords: matrix, aij, compressed row, sparse, parallel
2999 
3000 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3001 @*/
3002 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3003 {
3004   PetscErrorCode ierr;
3005 
3006   PetscFunctionBegin;
3007   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3008   PetscValidType(B,1);
3009   PetscValidLogicalCollectiveInt(B,bs,2);
3010   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3011   PetscFunctionReturn(0);
3012 }
3013 
3014 EXTERN_C_BEGIN
3015 #undef __FUNCT__
3016 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3017 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
3018 {
3019   Mat_MPIBAIJ    *b;
3020   PetscErrorCode ierr;
3021   PetscInt       i;
3022   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3023 
3024   PetscFunctionBegin;
3025   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3026   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3027 
3028   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3029   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3030   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3031   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3032 
3033   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3034   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3035   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3036   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3037   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3038 
3039   if (d_nnz) {
3040     for (i=0; i<B->rmap->n/bs; i++) {
3041       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3042     }
3043   }
3044   if (o_nnz) {
3045     for (i=0; i<B->rmap->n/bs; i++) {
3046       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3047     }
3048   }
3049 
3050   b = (Mat_MPIBAIJ*)B->data;
3051   b->bs2 = bs*bs;
3052   b->mbs = B->rmap->n/bs;
3053   b->nbs = B->cmap->n/bs;
3054   b->Mbs = B->rmap->N/bs;
3055   b->Nbs = B->cmap->N/bs;
3056 
3057   for (i=0; i<=b->size; i++) {
3058     b->rangebs[i] = B->rmap->range[i]/bs;
3059   }
3060   b->rstartbs = B->rmap->rstart/bs;
3061   b->rendbs   = B->rmap->rend/bs;
3062   b->cstartbs = B->cmap->rstart/bs;
3063   b->cendbs   = B->cmap->rend/bs;
3064 
3065   if (!B->preallocated) {
3066     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3067     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3068     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3069     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3070     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3071     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3072     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3073     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3074     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3075   }
3076 
3077   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3078   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3079   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3080   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3081   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3082   B->preallocated = PETSC_TRUE;
3083   PetscFunctionReturn(0);
3084 }
3085 EXTERN_C_END
3086 
3087 EXTERN_C_BEGIN
3088 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3089 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3090 EXTERN_C_END
3091 
3092 
3093 EXTERN_C_BEGIN
3094 #undef __FUNCT__
3095 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3096 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3097 {
3098   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3099   PetscErrorCode ierr;
3100   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3101   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3102   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3103 
3104   PetscFunctionBegin;
3105   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3106   ii[0] = 0;
3107   CHKMEMQ;
3108   for (i=0; i<M; i++) {
3109     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3110     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3111     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3112     /* remove one from count of matrix has diagonal */
3113     for (j=id[i]; j<id[i+1]; j++) {
3114       if (jd[j] == i) {ii[i+1]--;break;}
3115     }
3116   CHKMEMQ;
3117   }
3118   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3119   cnt = 0;
3120   for (i=0; i<M; i++) {
3121     for (j=io[i]; j<io[i+1]; j++) {
3122       if (garray[jo[j]] > rstart) break;
3123       jj[cnt++] = garray[jo[j]];
3124   CHKMEMQ;
3125     }
3126     for (k=id[i]; k<id[i+1]; k++) {
3127       if (jd[k] != i) {
3128         jj[cnt++] = rstart + jd[k];
3129   CHKMEMQ;
3130       }
3131     }
3132     for (;j<io[i+1]; j++) {
3133       jj[cnt++] = garray[jo[j]];
3134   CHKMEMQ;
3135     }
3136   }
3137   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3138   PetscFunctionReturn(0);
3139 }
3140 EXTERN_C_END
3141 
3142 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3143 EXTERN_C_BEGIN
3144 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
3145 EXTERN_C_END
3146 
3147 EXTERN_C_BEGIN
3148 #undef __FUNCT__
3149 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3150 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
3151 {
3152   PetscErrorCode ierr;
3153   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3154   Mat            B;
3155   Mat_MPIAIJ     *b;
3156 
3157   PetscFunctionBegin;
3158   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3159 
3160   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3161   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3162   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3163   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3164   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3165   b = (Mat_MPIAIJ*) B->data;
3166 
3167   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3168   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3169   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3170   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3171   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3172   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3173   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3174   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3175   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3176   if (reuse == MAT_REUSE_MATRIX) {
3177     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3178   } else {
3179    *newmat = B;
3180   }
3181   PetscFunctionReturn(0);
3182 }
3183 EXTERN_C_END
3184 
3185 EXTERN_C_BEGIN
3186 #if defined(PETSC_HAVE_MUMPS)
3187 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3188 #endif
3189 EXTERN_C_END
3190 
3191 /*MC
3192    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3193 
3194    Options Database Keys:
3195 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3196 . -mat_block_size <bs> - set the blocksize used to store the matrix
3197 - -mat_use_hash_table <fact>
3198 
3199   Level: beginner
3200 
3201 .seealso: MatCreateMPIBAIJ
3202 M*/
3203 
3204 EXTERN_C_BEGIN
3205 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*);
3206 EXTERN_C_END
3207 
3208 EXTERN_C_BEGIN
3209 #undef __FUNCT__
3210 #define __FUNCT__ "MatCreate_MPIBAIJ"
3211 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3212 {
3213   Mat_MPIBAIJ    *b;
3214   PetscErrorCode ierr;
3215   PetscBool      flg;
3216 
3217   PetscFunctionBegin;
3218   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3219   B->data = (void*)b;
3220 
3221   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3222   B->assembled  = PETSC_FALSE;
3223 
3224   B->insertmode = NOT_SET_VALUES;
3225   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3226   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3227 
3228   /* build local table of row and column ownerships */
3229   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3230 
3231   /* build cache for off array entries formed */
3232   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3233   b->donotstash  = PETSC_FALSE;
3234   b->colmap      = PETSC_NULL;
3235   b->garray      = PETSC_NULL;
3236   b->roworiented = PETSC_TRUE;
3237 
3238   /* stuff used in block assembly */
3239   b->barray       = 0;
3240 
3241   /* stuff used for matrix vector multiply */
3242   b->lvec         = 0;
3243   b->Mvctx        = 0;
3244 
3245   /* stuff for MatGetRow() */
3246   b->rowindices   = 0;
3247   b->rowvalues    = 0;
3248   b->getrowactive = PETSC_FALSE;
3249 
3250   /* hash table stuff */
3251   b->ht           = 0;
3252   b->hd           = 0;
3253   b->ht_size      = 0;
3254   b->ht_flag      = PETSC_FALSE;
3255   b->ht_fact      = 0;
3256   b->ht_total_ct  = 0;
3257   b->ht_insert_ct = 0;
3258 
3259   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3260   b->ijonly       = PETSC_FALSE;
3261 
3262   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3263     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3264     if (flg) {
3265       PetscReal fact = 1.39;
3266       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3267       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3268       if (fact <= 1.0) fact = 1.39;
3269       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3270       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3271     }
3272   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3273 
3274 #if defined(PETSC_HAVE_MUMPS)
3275   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3276 #endif
3277   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3278                                      "MatConvert_MPIBAIJ_MPIAdj",
3279                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3280   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3281                                      "MatConvert_MPIBAIJ_MPIAIJ",
3282                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3283   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3284                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3285                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3286   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3287                                      "MatStoreValues_MPIBAIJ",
3288                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3289   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3290                                      "MatRetrieveValues_MPIBAIJ",
3291                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3292   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3293                                      "MatGetDiagonalBlock_MPIBAIJ",
3294                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3295   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3296                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3297                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3298   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3299 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3300 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3301   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3302                                      "MatDiagonalScaleLocal_MPIBAIJ",
3303                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3304   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3305                                      "MatSetHashTableFactor_MPIBAIJ",
3306                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3307   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3308                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3309                                       MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3310   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3311   PetscFunctionReturn(0);
3312 }
3313 EXTERN_C_END
3314 
3315 /*MC
3316    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3317 
3318    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3319    and MATMPIBAIJ otherwise.
3320 
3321    Options Database Keys:
3322 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3323 
3324   Level: beginner
3325 
3326 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3327 M*/
3328 
3329 #undef __FUNCT__
3330 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3331 /*@C
3332    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3333    (block compressed row).  For good matrix assembly performance
3334    the user should preallocate the matrix storage by setting the parameters
3335    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3336    performance can be increased by more than a factor of 50.
3337 
3338    Collective on Mat
3339 
3340    Input Parameters:
3341 +  A - the matrix
3342 .  bs   - size of blockk
3343 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3344            submatrix  (same for all local rows)
3345 .  d_nnz - array containing the number of block nonzeros in the various block rows
3346            of the in diagonal portion of the local (possibly different for each block
3347            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3348            set it even if it is zero.
3349 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3350            submatrix (same for all local rows).
3351 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3352            off-diagonal portion of the local submatrix (possibly different for
3353            each block row) or PETSC_NULL.
3354 
3355    If the *_nnz parameter is given then the *_nz parameter is ignored
3356 
3357    Options Database Keys:
3358 +   -mat_block_size - size of the blocks to use
3359 -   -mat_use_hash_table <fact>
3360 
3361    Notes:
3362    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3363    than it must be used on all processors that share the object for that argument.
3364 
3365    Storage Information:
3366    For a square global matrix we define each processor's diagonal portion
3367    to be its local rows and the corresponding columns (a square submatrix);
3368    each processor's off-diagonal portion encompasses the remainder of the
3369    local matrix (a rectangular submatrix).
3370 
3371    The user can specify preallocated storage for the diagonal part of
3372    the local submatrix with either d_nz or d_nnz (not both).  Set
3373    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3374    memory allocation.  Likewise, specify preallocated storage for the
3375    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3376 
3377    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3378    the figure below we depict these three local rows and all columns (0-11).
3379 
3380 .vb
3381            0 1 2 3 4 5 6 7 8 9 10 11
3382           -------------------
3383    row 3  |  o o o d d d o o o o o o
3384    row 4  |  o o o d d d o o o o o o
3385    row 5  |  o o o d d d o o o o o o
3386           -------------------
3387 .ve
3388 
3389    Thus, any entries in the d locations are stored in the d (diagonal)
3390    submatrix, and any entries in the o locations are stored in the
3391    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3392    stored simply in the MATSEQBAIJ format for compressed row storage.
3393 
3394    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3395    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3396    In general, for PDE problems in which most nonzeros are near the diagonal,
3397    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3398    or you will get TERRIBLE performance; see the users' manual chapter on
3399    matrices.
3400 
3401    You can call MatGetInfo() to get information on how effective the preallocation was;
3402    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3403    You can also run with the option -info and look for messages with the string
3404    malloc in them to see if additional memory allocation was needed.
3405 
3406    Level: intermediate
3407 
3408 .keywords: matrix, block, aij, compressed row, sparse, parallel
3409 
3410 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR()
3411 @*/
3412 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3413 {
3414   PetscErrorCode ierr;
3415 
3416   PetscFunctionBegin;
3417   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3418   PetscValidType(B,1);
3419   PetscValidLogicalCollectiveInt(B,bs,2);
3420   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3421   PetscFunctionReturn(0);
3422 }
3423 
3424 #undef __FUNCT__
3425 #define __FUNCT__ "MatCreateBAIJ"
3426 /*@C
3427    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3428    (block compressed row).  For good matrix assembly performance
3429    the user should preallocate the matrix storage by setting the parameters
3430    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3431    performance can be increased by more than a factor of 50.
3432 
3433    Collective on MPI_Comm
3434 
3435    Input Parameters:
3436 +  comm - MPI communicator
3437 .  bs   - size of blockk
3438 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3439            This value should be the same as the local size used in creating the
3440            y vector for the matrix-vector product y = Ax.
3441 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3442            This value should be the same as the local size used in creating the
3443            x vector for the matrix-vector product y = Ax.
3444 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3445 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3446 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3447            submatrix  (same for all local rows)
3448 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3449            of the in diagonal portion of the local (possibly different for each block
3450            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3451            and set it even if it is zero.
3452 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3453            submatrix (same for all local rows).
3454 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3455            off-diagonal portion of the local submatrix (possibly different for
3456            each block row) or PETSC_NULL.
3457 
3458    Output Parameter:
3459 .  A - the matrix
3460 
3461    Options Database Keys:
3462 +   -mat_block_size - size of the blocks to use
3463 -   -mat_use_hash_table <fact>
3464 
3465    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3466    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3467    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3468 
3469    Notes:
3470    If the *_nnz parameter is given then the *_nz parameter is ignored
3471 
3472    A nonzero block is any block that as 1 or more nonzeros in it
3473 
3474    The user MUST specify either the local or global matrix dimensions
3475    (possibly both).
3476 
3477    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3478    than it must be used on all processors that share the object for that argument.
3479 
3480    Storage Information:
3481    For a square global matrix we define each processor's diagonal portion
3482    to be its local rows and the corresponding columns (a square submatrix);
3483    each processor's off-diagonal portion encompasses the remainder of the
3484    local matrix (a rectangular submatrix).
3485 
3486    The user can specify preallocated storage for the diagonal part of
3487    the local submatrix with either d_nz or d_nnz (not both).  Set
3488    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3489    memory allocation.  Likewise, specify preallocated storage for the
3490    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3491 
3492    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3493    the figure below we depict these three local rows and all columns (0-11).
3494 
3495 .vb
3496            0 1 2 3 4 5 6 7 8 9 10 11
3497           -------------------
3498    row 3  |  o o o d d d o o o o o o
3499    row 4  |  o o o d d d o o o o o o
3500    row 5  |  o o o d d d o o o o o o
3501           -------------------
3502 .ve
3503 
3504    Thus, any entries in the d locations are stored in the d (diagonal)
3505    submatrix, and any entries in the o locations are stored in the
3506    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3507    stored simply in the MATSEQBAIJ format for compressed row storage.
3508 
3509    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3510    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3511    In general, for PDE problems in which most nonzeros are near the diagonal,
3512    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3513    or you will get TERRIBLE performance; see the users' manual chapter on
3514    matrices.
3515 
3516    Level: intermediate
3517 
3518 .keywords: matrix, block, aij, compressed row, sparse, parallel
3519 
3520 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3521 @*/
3522 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3523 {
3524   PetscErrorCode ierr;
3525   PetscMPIInt    size;
3526 
3527   PetscFunctionBegin;
3528   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3529   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3530   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3531   if (size > 1) {
3532     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3533     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3534   } else {
3535     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3536     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3537   }
3538   PetscFunctionReturn(0);
3539 }
3540 
3541 #undef __FUNCT__
3542 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3543 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3544 {
3545   Mat            mat;
3546   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3547   PetscErrorCode ierr;
3548   PetscInt       len=0;
3549 
3550   PetscFunctionBegin;
3551   *newmat       = 0;
3552   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3553   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3554   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3555   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3556 
3557   mat->factortype   = matin->factortype;
3558   mat->preallocated = PETSC_TRUE;
3559   mat->assembled    = PETSC_TRUE;
3560   mat->insertmode   = NOT_SET_VALUES;
3561 
3562   a      = (Mat_MPIBAIJ*)mat->data;
3563   mat->rmap->bs  = matin->rmap->bs;
3564   a->bs2   = oldmat->bs2;
3565   a->mbs   = oldmat->mbs;
3566   a->nbs   = oldmat->nbs;
3567   a->Mbs   = oldmat->Mbs;
3568   a->Nbs   = oldmat->Nbs;
3569 
3570   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3571   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3572 
3573   a->size         = oldmat->size;
3574   a->rank         = oldmat->rank;
3575   a->donotstash   = oldmat->donotstash;
3576   a->roworiented  = oldmat->roworiented;
3577   a->rowindices   = 0;
3578   a->rowvalues    = 0;
3579   a->getrowactive = PETSC_FALSE;
3580   a->barray       = 0;
3581   a->rstartbs     = oldmat->rstartbs;
3582   a->rendbs       = oldmat->rendbs;
3583   a->cstartbs     = oldmat->cstartbs;
3584   a->cendbs       = oldmat->cendbs;
3585 
3586   /* hash table stuff */
3587   a->ht           = 0;
3588   a->hd           = 0;
3589   a->ht_size      = 0;
3590   a->ht_flag      = oldmat->ht_flag;
3591   a->ht_fact      = oldmat->ht_fact;
3592   a->ht_total_ct  = 0;
3593   a->ht_insert_ct = 0;
3594 
3595   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3596   if (oldmat->colmap) {
3597 #if defined (PETSC_USE_CTABLE)
3598   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3599 #else
3600   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3601   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3602   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3603 #endif
3604   } else a->colmap = 0;
3605 
3606   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3607     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3608     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3609     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3610   } else a->garray = 0;
3611 
3612   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3613   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3614   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3615   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3616   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3617 
3618   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3619   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3620   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3621   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3622   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3623   *newmat = mat;
3624 
3625   PetscFunctionReturn(0);
3626 }
3627 
3628 #undef __FUNCT__
3629 #define __FUNCT__ "MatLoad_MPIBAIJ"
3630 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3631 {
3632   PetscErrorCode ierr;
3633   int            fd;
3634   PetscInt       i,nz,j,rstart,rend;
3635   PetscScalar    *vals,*buf;
3636   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3637   MPI_Status     status;
3638   PetscMPIInt    rank,size,maxnz;
3639   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3640   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3641   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3642   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3643   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3644   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3645 
3646   PetscFunctionBegin;
3647   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3648     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3649   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3650 
3651   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3652   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3653   if (!rank) {
3654     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3655     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3656     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3657   }
3658 
3659   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3660 
3661   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3662   M = header[1]; N = header[2];
3663 
3664   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3665   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3666   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3667 
3668   /* If global sizes are set, check if they are consistent with that given in the file */
3669   if (sizesset) {
3670     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3671   }
3672   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3673   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3674 
3675   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3676 
3677   /*
3678      This code adds extra rows to make sure the number of rows is
3679      divisible by the blocksize
3680   */
3681   Mbs        = M/bs;
3682   extra_rows = bs - M + bs*Mbs;
3683   if (extra_rows == bs) extra_rows = 0;
3684   else                  Mbs++;
3685   if (extra_rows && !rank) {
3686     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3687   }
3688 
3689   /* determine ownership of all rows */
3690   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3691     mbs        = Mbs/size + ((Mbs % size) > rank);
3692     m          = mbs*bs;
3693   } else { /* User set */
3694     m          = newmat->rmap->n;
3695     mbs        = m/bs;
3696   }
3697   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3698   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3699 
3700   /* process 0 needs enough room for process with most rows */
3701   if (!rank) {
3702     mmax = rowners[1];
3703     for (i=2; i<=size; i++) {
3704       mmax = PetscMax(mmax,rowners[i]);
3705     }
3706     mmax*=bs;
3707   } else mmax = m;
3708 
3709   rowners[0] = 0;
3710   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3711   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3712   rstart = rowners[rank];
3713   rend   = rowners[rank+1];
3714 
3715   /* distribute row lengths to all processors */
3716   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3717   if (!rank) {
3718     mend = m;
3719     if (size == 1) mend = mend - extra_rows;
3720     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3721     for (j=mend; j<m; j++) locrowlens[j] = 1;
3722     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3723     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3724     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3725     for (j=0; j<m; j++) {
3726       procsnz[0] += locrowlens[j];
3727     }
3728     for (i=1; i<size; i++) {
3729       mend = browners[i+1] - browners[i];
3730       if (i == size-1) mend = mend - extra_rows;
3731       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3732       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3733       /* calculate the number of nonzeros on each processor */
3734       for (j=0; j<browners[i+1]-browners[i]; j++) {
3735         procsnz[i] += rowlengths[j];
3736       }
3737       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3738     }
3739     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3740   } else {
3741     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3742   }
3743 
3744   if (!rank) {
3745     /* determine max buffer needed and allocate it */
3746     maxnz = procsnz[0];
3747     for (i=1; i<size; i++) {
3748       maxnz = PetscMax(maxnz,procsnz[i]);
3749     }
3750     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3751 
3752     /* read in my part of the matrix column indices  */
3753     nz     = procsnz[0];
3754     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3755     mycols = ibuf;
3756     if (size == 1)  nz -= extra_rows;
3757     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3758     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3759 
3760     /* read in every ones (except the last) and ship off */
3761     for (i=1; i<size-1; i++) {
3762       nz   = procsnz[i];
3763       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3764       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3765     }
3766     /* read in the stuff for the last proc */
3767     if (size != 1) {
3768       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3769       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3770       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3771       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3772     }
3773     ierr = PetscFree(cols);CHKERRQ(ierr);
3774   } else {
3775     /* determine buffer space needed for message */
3776     nz = 0;
3777     for (i=0; i<m; i++) {
3778       nz += locrowlens[i];
3779     }
3780     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3781     mycols = ibuf;
3782     /* receive message of column indices*/
3783     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3784     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3785     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3786   }
3787 
3788   /* loop over local rows, determining number of off diagonal entries */
3789   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3790   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3791   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3792   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3793   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3794   rowcount = 0; nzcount = 0;
3795   for (i=0; i<mbs; i++) {
3796     dcount  = 0;
3797     odcount = 0;
3798     for (j=0; j<bs; j++) {
3799       kmax = locrowlens[rowcount];
3800       for (k=0; k<kmax; k++) {
3801         tmp = mycols[nzcount++]/bs;
3802         if (!mask[tmp]) {
3803           mask[tmp] = 1;
3804           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3805           else masked1[dcount++] = tmp;
3806         }
3807       }
3808       rowcount++;
3809     }
3810 
3811     dlens[i]  = dcount;
3812     odlens[i] = odcount;
3813 
3814     /* zero out the mask elements we set */
3815     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3816     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3817   }
3818 
3819 
3820   if (!sizesset) {
3821     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3822   }
3823   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3824 
3825   if (!rank) {
3826     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3827     /* read in my part of the matrix numerical values  */
3828     nz = procsnz[0];
3829     vals = buf;
3830     mycols = ibuf;
3831     if (size == 1)  nz -= extra_rows;
3832     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3833     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3834 
3835     /* insert into matrix */
3836     jj      = rstart*bs;
3837     for (i=0; i<m; i++) {
3838       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3839       mycols += locrowlens[i];
3840       vals   += locrowlens[i];
3841       jj++;
3842     }
3843     /* read in other processors (except the last one) and ship out */
3844     for (i=1; i<size-1; i++) {
3845       nz   = procsnz[i];
3846       vals = buf;
3847       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3848       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3849     }
3850     /* the last proc */
3851     if (size != 1){
3852       nz   = procsnz[i] - extra_rows;
3853       vals = buf;
3854       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3855       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3856       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3857     }
3858     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3859   } else {
3860     /* receive numeric values */
3861     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3862 
3863     /* receive message of values*/
3864     vals   = buf;
3865     mycols = ibuf;
3866     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3867     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3868     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3869 
3870     /* insert into matrix */
3871     jj      = rstart*bs;
3872     for (i=0; i<m; i++) {
3873       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3874       mycols += locrowlens[i];
3875       vals   += locrowlens[i];
3876       jj++;
3877     }
3878   }
3879   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3880   ierr = PetscFree(buf);CHKERRQ(ierr);
3881   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3882   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3883   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3884   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3885   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3886   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3887 
3888   PetscFunctionReturn(0);
3889 }
3890 
3891 #undef __FUNCT__
3892 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3893 /*@
3894    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3895 
3896    Input Parameters:
3897 .  mat  - the matrix
3898 .  fact - factor
3899 
3900    Not Collective, each process can use a different factor
3901 
3902    Level: advanced
3903 
3904   Notes:
3905    This can also be set by the command line option: -mat_use_hash_table <fact>
3906 
3907 .keywords: matrix, hashtable, factor, HT
3908 
3909 .seealso: MatSetOption()
3910 @*/
3911 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3912 {
3913   PetscErrorCode ierr;
3914 
3915   PetscFunctionBegin;
3916   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3917   PetscFunctionReturn(0);
3918 }
3919 
3920 EXTERN_C_BEGIN
3921 #undef __FUNCT__
3922 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3923 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3924 {
3925   Mat_MPIBAIJ *baij;
3926 
3927   PetscFunctionBegin;
3928   baij = (Mat_MPIBAIJ*)mat->data;
3929   baij->ht_fact = fact;
3930   PetscFunctionReturn(0);
3931 }
3932 EXTERN_C_END
3933 
3934 #undef __FUNCT__
3935 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3936 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3937 {
3938   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3939   PetscFunctionBegin;
3940   *Ad     = a->A;
3941   *Ao     = a->B;
3942   *colmap = a->garray;
3943   PetscFunctionReturn(0);
3944 }
3945 
3946 /*
3947     Special version for direct calls from Fortran (to eliminate two function call overheads
3948 */
3949 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3950 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3951 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3952 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3953 #endif
3954 
3955 #undef __FUNCT__
3956 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3957 /*@C
3958   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3959 
3960   Collective on Mat
3961 
3962   Input Parameters:
3963 + mat - the matrix
3964 . min - number of input rows
3965 . im - input rows
3966 . nin - number of input columns
3967 . in - input columns
3968 . v - numerical values input
3969 - addvin - INSERT_VALUES or ADD_VALUES
3970 
3971   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3972 
3973   Level: advanced
3974 
3975 .seealso:   MatSetValuesBlocked()
3976 @*/
3977 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3978 {
3979   /* convert input arguments to C version */
3980   Mat             mat = *matin;
3981   PetscInt        m = *min, n = *nin;
3982   InsertMode      addv = *addvin;
3983 
3984   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3985   const MatScalar *value;
3986   MatScalar       *barray=baij->barray;
3987   PetscBool       roworiented = baij->roworiented;
3988   PetscErrorCode  ierr;
3989   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3990   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3991   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3992 
3993   PetscFunctionBegin;
3994   /* tasks normally handled by MatSetValuesBlocked() */
3995   if (mat->insertmode == NOT_SET_VALUES) {
3996     mat->insertmode = addv;
3997   }
3998 #if defined(PETSC_USE_DEBUG)
3999   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4000   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4001 #endif
4002   if (mat->assembled) {
4003     mat->was_assembled = PETSC_TRUE;
4004     mat->assembled     = PETSC_FALSE;
4005   }
4006   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4007 
4008 
4009   if(!barray) {
4010     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
4011     baij->barray = barray;
4012   }
4013 
4014   if (roworiented) {
4015     stepval = (n-1)*bs;
4016   } else {
4017     stepval = (m-1)*bs;
4018   }
4019   for (i=0; i<m; i++) {
4020     if (im[i] < 0) continue;
4021 #if defined(PETSC_USE_DEBUG)
4022     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4023 #endif
4024     if (im[i] >= rstart && im[i] < rend) {
4025       row = im[i] - rstart;
4026       for (j=0; j<n; j++) {
4027         /* If NumCol = 1 then a copy is not required */
4028         if ((roworiented) && (n == 1)) {
4029           barray = (MatScalar*)v + i*bs2;
4030         } else if((!roworiented) && (m == 1)) {
4031           barray = (MatScalar*)v + j*bs2;
4032         } else { /* Here a copy is required */
4033           if (roworiented) {
4034             value = v + i*(stepval+bs)*bs + j*bs;
4035           } else {
4036             value = v + j*(stepval+bs)*bs + i*bs;
4037           }
4038           for (ii=0; ii<bs; ii++,value+=stepval) {
4039             for (jj=0; jj<bs; jj++) {
4040               *barray++  = *value++;
4041             }
4042           }
4043           barray -=bs2;
4044         }
4045 
4046         if (in[j] >= cstart && in[j] < cend){
4047           col  = in[j] - cstart;
4048           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4049         }
4050         else if (in[j] < 0) continue;
4051 #if defined(PETSC_USE_DEBUG)
4052         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4053 #endif
4054         else {
4055           if (mat->was_assembled) {
4056             if (!baij->colmap) {
4057               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4058             }
4059 
4060 #if defined(PETSC_USE_DEBUG)
4061 #if defined (PETSC_USE_CTABLE)
4062             { PetscInt data;
4063               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4064               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4065             }
4066 #else
4067             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4068 #endif
4069 #endif
4070 #if defined (PETSC_USE_CTABLE)
4071 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4072             col  = (col - 1)/bs;
4073 #else
4074             col = (baij->colmap[in[j]] - 1)/bs;
4075 #endif
4076             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4077               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4078               col =  in[j];
4079             }
4080           }
4081           else col = in[j];
4082           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4083         }
4084       }
4085     } else {
4086       if (!baij->donotstash) {
4087         if (roworiented) {
4088           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4089         } else {
4090           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4091         }
4092       }
4093     }
4094   }
4095 
4096   /* task normally handled by MatSetValuesBlocked() */
4097   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4098   PetscFunctionReturn(0);
4099 }
4100 
4101 #undef __FUNCT__
4102 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4103 /*@
4104      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4105          CSR format the local rows.
4106 
4107    Collective on MPI_Comm
4108 
4109    Input Parameters:
4110 +  comm - MPI communicator
4111 .  bs - the block size, only a block size of 1 is supported
4112 .  m - number of local rows (Cannot be PETSC_DECIDE)
4113 .  n - This value should be the same as the local size used in creating the
4114        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4115        calculated if N is given) For square matrices n is almost always m.
4116 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4117 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4118 .   i - row indices
4119 .   j - column indices
4120 -   a - matrix values
4121 
4122    Output Parameter:
4123 .   mat - the matrix
4124 
4125    Level: intermediate
4126 
4127    Notes:
4128        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4129      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4130      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4131 
4132        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4133 
4134 .keywords: matrix, aij, compressed row, sparse, parallel
4135 
4136 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4137           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4138 @*/
4139 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4140 {
4141   PetscErrorCode ierr;
4142 
4143 
4144  PetscFunctionBegin;
4145   if (i[0]) {
4146     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4147   }
4148   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4149   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4150   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4151   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4152   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4153   PetscFunctionReturn(0);
4154 }
4155