xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 2ce3a92b0ff1f4a5ead6970fba7dea0d46470f06)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++){
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++){
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig){
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         } else if (in[j] < 0) continue;
229 #if defined(PETSC_USE_DEBUG)
230         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
231 #endif
232         else {
233           if (mat->was_assembled) {
234             if (!baij->colmap) {
235               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
236             }
237 #if defined (PETSC_USE_CTABLE)
238             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
239             col  = col - 1;
240 #else
241             col = baij->colmap[in[j]/bs] - 1;
242 #endif
243             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
244               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
245               col =  in[j];
246               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
247               B = baij->B;
248               b = (Mat_SeqBAIJ*)(B)->data;
249               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
250               ba=b->a;
251             } else col += in[j]%bs;
252           } else col = in[j];
253           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
254           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
260       if (!baij->donotstash) {
261         if (roworiented) {
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
263         } else {
264           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
265         }
266       }
267     }
268   }
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
275 {
276   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
277   const PetscScalar *value;
278   MatScalar         *barray=baij->barray;
279   PetscBool         roworiented = baij->roworiented;
280   PetscErrorCode    ierr;
281   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
282   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
283   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
284 
285   PetscFunctionBegin;
286   if(!barray) {
287     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
288     baij->barray = barray;
289   }
290 
291   if (roworiented) {
292     stepval = (n-1)*bs;
293   } else {
294     stepval = (m-1)*bs;
295   }
296   for (i=0; i<m; i++) {
297     if (im[i] < 0) continue;
298 #if defined(PETSC_USE_DEBUG)
299     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
300 #endif
301     if (im[i] >= rstart && im[i] < rend) {
302       row = im[i] - rstart;
303       for (j=0; j<n; j++) {
304         /* If NumCol = 1 then a copy is not required */
305         if ((roworiented) && (n == 1)) {
306           barray = (MatScalar*)v + i*bs2;
307         } else if((!roworiented) && (m == 1)) {
308           barray = (MatScalar*)v + j*bs2;
309         } else { /* Here a copy is required */
310           if (roworiented) {
311             value = v + (i*(stepval+bs) + j)*bs;
312           } else {
313 	    value = v + (j*(stepval+bs) + i)*bs;
314           }
315           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
316             for (jj=0; jj<bs; jj++) {
317               barray[jj]  = value[jj];
318             }
319             barray += bs;
320           }
321           barray -= bs2;
322         }
323 
324         if (in[j] >= cstart && in[j] < cend){
325           col  = in[j] - cstart;
326           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
327         }
328         else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined (PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined (PETSC_USE_CTABLE)
349 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
355               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col =  in[j];
357             }
358           }
359           else col = in[j];
360           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
361         }
362       }
363     } else {
364       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
365       if (!baij->donotstash) {
366         if (roworiented) {
367           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         } else {
369           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         }
371       }
372     }
373   }
374   PetscFunctionReturn(0);
375 }
376 
377 #define HASH_KEY 0.6180339887
378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 #undef __FUNCT__
382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
384 {
385   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
386   PetscBool      roworiented = baij->roworiented;
387   PetscErrorCode ierr;
388   PetscInt       i,j,row,col;
389   PetscInt       rstart_orig=mat->rmap->rstart;
390   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
391   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
392   PetscReal      tmp;
393   MatScalar      **HD = baij->hd,value;
394 #if defined(PETSC_USE_DEBUG)
395   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
396 #endif
397 
398   PetscFunctionBegin;
399   if (v) PetscValidScalarPointer(v,6);
400   for (i=0; i<m; i++) {
401 #if defined(PETSC_USE_DEBUG)
402     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
403     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
404 #endif
405       row = im[i];
406     if (row >= rstart_orig && row < rend_orig) {
407       for (j=0; j<n; j++) {
408         col = in[j];
409         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
423             if (idx == h1) {
424               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
425             }
426           }
427         }
428 #else
429         if (HT[idx] != key) {
430           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
431           if (idx == size) {
432             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
433             if (idx == h1) {
434               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
435             }
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453 #if defined(PETSC_USE_DEBUG)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463 {
464   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465   PetscBool         roworiented = baij->roworiented;
466   PetscErrorCode    ierr;
467   PetscInt          i,j,ii,jj,row,col;
468   PetscInt          rstart=baij->rstartbs;
469   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471   PetscReal         tmp;
472   MatScalar         **HD = baij->hd,*baij_a;
473   const PetscScalar *v_t,*value;
474 #if defined(PETSC_USE_DEBUG)
475   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476 #endif
477 
478   PetscFunctionBegin;
479 
480   if (roworiented) {
481     stepval = (n-1)*bs;
482   } else {
483     stepval = (m-1)*bs;
484   }
485   for (i=0; i<m; i++) {
486 #if defined(PETSC_USE_DEBUG)
487     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
488     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
489 #endif
490     row   = im[i];
491     v_t   = v + i*nbs2;
492     if (row >= rstart && row < rend) {
493       for (j=0; j<n; j++) {
494         col = in[j];
495 
496         /* Look up into the Hash Table */
497         key = row*Nbs+col+1;
498         h1  = HASH(size,key,tmp);
499 
500         idx = h1;
501 #if defined(PETSC_USE_DEBUG)
502         total_ct++;
503         insert_ct++;
504        if (HT[idx] != key) {
505           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
506           if (idx == size) {
507             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
508             if (idx == h1) {
509               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510             }
511           }
512         }
513 #else
514         if (HT[idx] != key) {
515           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
516           if (idx == size) {
517             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
518             if (idx == h1) {
519               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
520             }
521           }
522         }
523 #endif
524         baij_a = HD[idx];
525         if (roworiented) {
526           /*value = v + i*(stepval+bs)*bs + j*bs;*/
527           /* value = v + (i*(stepval+bs)+j)*bs; */
528           value = v_t;
529           v_t  += bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval) {
532               for (jj=ii; jj<bs2; jj+=bs) {
533                 baij_a[jj]  += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval) {
538               for (jj=ii; jj<bs2; jj+=bs) {
539                 baij_a[jj]  = *value++;
540               }
541             }
542           }
543         } else {
544           value = v + j*(stepval+bs)*bs + i*bs;
545           if (addv == ADD_VALUES) {
546             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
547               for (jj=0; jj<bs; jj++) {
548                 baij_a[jj]  += *value++;
549               }
550             }
551           } else {
552             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
553               for (jj=0; jj<bs; jj++) {
554                 baij_a[jj]  = *value++;
555               }
556             }
557           }
558         }
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetValues_MPIBAIJ"
579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580 {
581   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscErrorCode ierr;
583   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
584   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
585 
586   PetscFunctionBegin;
587   for (i=0; i<m; i++) {
588     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
591       row = idxm[i] - bsrstart;
592       for (j=0; j<n; j++) {
593         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595         if (idxn[j] >= bscstart && idxn[j] < bscend){
596           col = idxn[j] - bscstart;
597           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598         } else {
599           if (!baij->colmap) {
600             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
601           }
602 #if defined (PETSC_USE_CTABLE)
603           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
604           data --;
605 #else
606           data = baij->colmap[idxn[j]/bs]-1;
607 #endif
608           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
609           else {
610             col  = data + idxn[j]%bs;
611             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
612           }
613         }
614       }
615     } else {
616       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
617     }
618   }
619  PetscFunctionReturn(0);
620 }
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "MatNorm_MPIBAIJ"
624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
625 {
626   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
627   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
628   PetscErrorCode ierr;
629   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
630   PetscReal      sum = 0.0;
631   MatScalar      *v;
632 
633   PetscFunctionBegin;
634   if (baij->size == 1) {
635     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
636   } else {
637     if (type == NORM_FROBENIUS) {
638       v = amat->a;
639       nz = amat->nz*bs2;
640       for (i=0; i<nz; i++) {
641 #if defined(PETSC_USE_COMPLEX)
642         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
643 #else
644         sum += (*v)*(*v); v++;
645 #endif
646       }
647       v = bmat->a;
648       nz = bmat->nz*bs2;
649       for (i=0; i<nz; i++) {
650 #if defined(PETSC_USE_COMPLEX)
651         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
652 #else
653         sum += (*v)*(*v); v++;
654 #endif
655       }
656       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
657       *nrm = PetscSqrtReal(*nrm);
658     } else if (type == NORM_1) { /* max column sum */
659       PetscReal *tmp,*tmp2;
660       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
661       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
662       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
663       v = amat->a; jj = amat->j;
664       for (i=0; i<amat->nz; i++) {
665         for (j=0; j<bs; j++){
666           col = bs*(cstart + *jj) + j; /* column index */
667           for (row=0; row<bs; row++){
668             tmp[col] += PetscAbsScalar(*v);  v++;
669           }
670         }
671         jj++;
672       }
673       v = bmat->a; jj = bmat->j;
674       for (i=0; i<bmat->nz; i++) {
675         for (j=0; j<bs; j++){
676           col = bs*garray[*jj] + j;
677           for (row=0; row<bs; row++){
678             tmp[col] += PetscAbsScalar(*v); v++;
679           }
680         }
681         jj++;
682       }
683       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
684       *nrm = 0.0;
685       for (j=0; j<mat->cmap->N; j++) {
686         if (tmp2[j] > *nrm) *nrm = tmp2[j];
687       }
688       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
689     } else if (type == NORM_INFINITY) { /* max row sum */
690       PetscReal *sums;
691       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
692       sum = 0.0;
693       for (j=0; j<amat->mbs; j++) {
694         for (row=0; row<bs; row++) sums[row] = 0.0;
695         v = amat->a + bs2*amat->i[j];
696         nz = amat->i[j+1]-amat->i[j];
697         for (i=0; i<nz; i++) {
698           for (col=0; col<bs; col++){
699             for (row=0; row<bs; row++){
700               sums[row] += PetscAbsScalar(*v); v++;
701             }
702           }
703         }
704         v = bmat->a + bs2*bmat->i[j];
705         nz = bmat->i[j+1]-bmat->i[j];
706         for (i=0; i<nz; i++) {
707           for (col=0; col<bs; col++){
708             for (row=0; row<bs; row++){
709               sums[row] += PetscAbsScalar(*v); v++;
710             }
711           }
712         }
713         for (row=0; row<bs; row++){
714           if (sums[row] > sum) sum = sums[row];
715         }
716       }
717       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
718       ierr = PetscFree(sums);CHKERRQ(ierr);
719     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 /*
725   Creates the hash table, and sets the table
726   This table is created only once.
727   If new entried need to be added to the matrix
728   then the hash table has to be destroyed and
729   recreated.
730 */
731 #undef __FUNCT__
732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
734 {
735   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
736   Mat            A = baij->A,B=baij->B;
737   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
738   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
739   PetscErrorCode ierr;
740   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
741   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
742   PetscInt       *HT,key;
743   MatScalar      **HD;
744   PetscReal      tmp;
745 #if defined(PETSC_USE_INFO)
746   PetscInt       ct=0,max=0;
747 #endif
748 
749   PetscFunctionBegin;
750   if (baij->ht) PetscFunctionReturn(0);
751 
752   baij->ht_size = (PetscInt)(factor*nz);
753   ht_size       = baij->ht_size;
754 
755   /* Allocate Memory for Hash Table */
756   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
757   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
758   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
759   HD   = baij->hd;
760   HT   = baij->ht;
761 
762   /* Loop Over A */
763   for (i=0; i<a->mbs; i++) {
764     for (j=ai[i]; j<ai[i+1]; j++) {
765       row = i+rstart;
766       col = aj[j]+cstart;
767 
768       key = row*Nbs + col + 1;
769       h1  = HASH(ht_size,key,tmp);
770       for (k=0; k<ht_size; k++){
771         if (!HT[(h1+k)%ht_size]) {
772           HT[(h1+k)%ht_size] = key;
773           HD[(h1+k)%ht_size] = a->a + j*bs2;
774           break;
775 #if defined(PETSC_USE_INFO)
776         } else {
777           ct++;
778 #endif
779         }
780       }
781 #if defined(PETSC_USE_INFO)
782       if (k> max) max = k;
783 #endif
784     }
785   }
786   /* Loop Over B */
787   for (i=0; i<b->mbs; i++) {
788     for (j=bi[i]; j<bi[i+1]; j++) {
789       row = i+rstart;
790       col = garray[bj[j]];
791       key = row*Nbs + col + 1;
792       h1  = HASH(ht_size,key,tmp);
793       for (k=0; k<ht_size; k++){
794         if (!HT[(h1+k)%ht_size]) {
795           HT[(h1+k)%ht_size] = key;
796           HD[(h1+k)%ht_size] = b->a + j*bs2;
797           break;
798 #if defined(PETSC_USE_INFO)
799         } else {
800           ct++;
801 #endif
802         }
803       }
804 #if defined(PETSC_USE_INFO)
805       if (k> max) max = k;
806 #endif
807     }
808   }
809 
810   /* Print Summary */
811 #if defined(PETSC_USE_INFO)
812   for (i=0,j=0; i<ht_size; i++) {
813     if (HT[i]) {j++;}
814   }
815   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
816 #endif
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
823 {
824   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
825   PetscErrorCode ierr;
826   PetscInt       nstash,reallocs;
827   InsertMode     addv;
828 
829   PetscFunctionBegin;
830   if (baij->donotstash || mat->nooffprocentries) {
831     PetscFunctionReturn(0);
832   }
833 
834   /* make sure all processors are either in INSERTMODE or ADDMODE */
835   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
836   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
837   mat->insertmode = addv; /* in case this processor had no cache */
838 
839   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
840   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
841   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
842   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
843   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
844   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
851 {
852   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
853   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
854   PetscErrorCode ierr;
855   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
856   PetscInt       *row,*col;
857   PetscBool      r1,r2,r3,other_disassembled;
858   MatScalar      *val;
859   InsertMode     addv = mat->insertmode;
860   PetscMPIInt    n;
861 
862   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
863   PetscFunctionBegin;
864   if (!baij->donotstash && !mat->nooffprocentries) {
865     while (1) {
866       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
867       if (!flg) break;
868 
869       for (i=0; i<n;) {
870         /* Now identify the consecutive vals belonging to the same row */
871         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
872         if (j < n) ncols = j-i;
873         else       ncols = n-i;
874         /* Now assemble all these values with a single function call */
875         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
876         i = j;
877       }
878     }
879     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
880     /* Now process the block-stash. Since the values are stashed column-oriented,
881        set the roworiented flag to column oriented, and after MatSetValues()
882        restore the original flags */
883     r1 = baij->roworiented;
884     r2 = a->roworiented;
885     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
886     baij->roworiented = PETSC_FALSE;
887     a->roworiented    = PETSC_FALSE;
888     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
889     while (1) {
890       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
891       if (!flg) break;
892 
893       for (i=0; i<n;) {
894         /* Now identify the consecutive vals belonging to the same row */
895         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
896         if (j < n) ncols = j-i;
897         else       ncols = n-i;
898         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
899         i = j;
900       }
901     }
902     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
903     baij->roworiented = r1;
904     a->roworiented    = r2;
905     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
906   }
907 
908   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
909   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
910 
911   /* determine if any processor has disassembled, if so we must
912      also disassemble ourselfs, in order that we may reassemble. */
913   /*
914      if nonzero structure of submatrix B cannot change then we know that
915      no processor disassembled thus we can skip this stuff
916   */
917   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
918     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
919     if (mat->was_assembled && !other_disassembled) {
920       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
921     }
922   }
923 
924   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
925     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
926   }
927   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
928   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
929   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
930 
931 #if defined(PETSC_USE_INFO)
932   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
933     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
934     baij->ht_total_ct  = 0;
935     baij->ht_insert_ct = 0;
936   }
937 #endif
938   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
939     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
940     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
941     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
942   }
943 
944   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
945   baij->rowvalues = 0;
946   PetscFunctionReturn(0);
947 }
948 
949 #undef __FUNCT__
950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
952 {
953   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
954   PetscErrorCode    ierr;
955   PetscMPIInt       size = baij->size,rank = baij->rank;
956   PetscInt          bs = mat->rmap->bs;
957   PetscBool         iascii,isdraw;
958   PetscViewer       sviewer;
959   PetscViewerFormat format;
960 
961   PetscFunctionBegin;
962   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
963   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
964   if (iascii) {
965     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
966     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
967       MatInfo info;
968       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
969       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
970       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
971       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
972              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
973       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
974       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
977       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
978       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
979       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
980       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
981       PetscFunctionReturn(0);
982     } else if (format == PETSC_VIEWER_ASCII_INFO) {
983       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
984       PetscFunctionReturn(0);
985     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
986       PetscFunctionReturn(0);
987     }
988   }
989 
990   if (isdraw) {
991     PetscDraw       draw;
992     PetscBool  isnull;
993     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
994     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
995   }
996 
997   if (size == 1) {
998     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
999     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1000   } else {
1001     /* assemble the entire matrix onto first processor. */
1002     Mat         A;
1003     Mat_SeqBAIJ *Aloc;
1004     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1005     MatScalar   *a;
1006 
1007     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1008     /* Perhaps this should be the type of mat? */
1009     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1010     if (!rank) {
1011       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1012     } else {
1013       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1014     }
1015     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1016     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1017     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1018     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1019 
1020     /* copy over the A part */
1021     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1022     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1023     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1024 
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = (baij->cstartbs+aj[j])*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     /* copy over the B part */
1037     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1038     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1039     for (i=0; i<mbs; i++) {
1040       rvals[0] = bs*(baij->rstartbs + i);
1041       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1042       for (j=ai[i]; j<ai[i+1]; j++) {
1043         col = baij->garray[aj[j]]*bs;
1044         for (k=0; k<bs; k++) {
1045           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1046           col++; a += bs;
1047         }
1048       }
1049     }
1050     ierr = PetscFree(rvals);CHKERRQ(ierr);
1051     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     /*
1054        Everyone has to call to draw the matrix since the graphics waits are
1055        synchronized across all processors that share the PetscDraw object
1056     */
1057     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1058     if (!rank) {
1059       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1060     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1061       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1062       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1063     }
1064     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1065     ierr = MatDestroy(&A);CHKERRQ(ierr);
1066   }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1072 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1073 {
1074   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1075   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1076   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1077   PetscErrorCode ierr;
1078   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1079   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1080   int            fd;
1081   PetscScalar    *column_values;
1082   FILE           *file;
1083   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1084   PetscInt       message_count,flowcontrolcount;
1085 
1086   PetscFunctionBegin;
1087   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1088   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1089   nz   = bs2*(A->nz + B->nz);
1090   rlen = mat->rmap->n;
1091   if (!rank) {
1092     header[0] = MAT_FILE_CLASSID;
1093     header[1] = mat->rmap->N;
1094     header[2] = mat->cmap->N;
1095     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1096     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1097     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1098     /* get largest number of rows any processor has */
1099     range = mat->rmap->range;
1100     for (i=1; i<size; i++) {
1101       rlen = PetscMax(rlen,range[i+1] - range[i]);
1102     }
1103   } else {
1104     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1105   }
1106 
1107   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1108   /* compute lengths of each row  */
1109   for (i=0; i<a->mbs; i++) {
1110     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1111   }
1112   /* store the row lengths to the file */
1113   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1114   if (!rank) {
1115     MPI_Status status;
1116     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1117     rlen  = (range[1] - range[0])/bs;
1118     for (i=0; i<rlen; i++) {
1119       for (j=0; j<bs; j++) {
1120         row_lens[i*bs+j] = bs*crow_lens[i];
1121       }
1122     }
1123     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1124     for (i=1; i<size; i++) {
1125       rlen = (range[i+1] - range[i])/bs;
1126       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1127       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1128       for (k=0; k<rlen; k++) {
1129 	for (j=0; j<bs; j++) {
1130 	  row_lens[k*bs+j] = bs*crow_lens[k];
1131 	}
1132       }
1133       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1134     }
1135     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1136     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1137   } else {
1138     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1139     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1140     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1141   }
1142   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1143 
1144   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1145      information needed to make it for each row from a block row. This does require more communication but still not more than
1146      the communication needed for the nonzero values  */
1147   nzmax = nz; /*  space a largest processor needs */
1148   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1149   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1150   cnt  = 0;
1151   for (i=0; i<a->mbs; i++) {
1152     pcnt = cnt;
1153     for (j=B->i[i]; j<B->i[i+1]; j++) {
1154       if ( (col = garray[B->j[j]]) > cstart) break;
1155       for (l=0; l<bs; l++) {
1156 	column_indices[cnt++] = bs*col+l;
1157       }
1158     }
1159     for (k=A->i[i]; k<A->i[i+1]; k++) {
1160       for (l=0; l<bs; l++) {
1161         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1162       }
1163     }
1164     for (; j<B->i[i+1]; j++) {
1165       for (l=0; l<bs; l++) {
1166         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1167       }
1168     }
1169     len = cnt - pcnt;
1170     for (k=1; k<bs; k++) {
1171       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1172       cnt += len;
1173     }
1174   }
1175   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1176 
1177   /* store the columns to the file */
1178   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1179   if (!rank) {
1180     MPI_Status status;
1181     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1182     for (i=1; i<size; i++) {
1183       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1184       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1185       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1186       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1187     }
1188     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1189   } else {
1190     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1191     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1192     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1193     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1194   }
1195   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1196 
1197   /* load up the numerical values */
1198   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1199   cnt = 0;
1200   for (i=0; i<a->mbs; i++) {
1201     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1202     for (j=B->i[i]; j<B->i[i+1]; j++) {
1203       if ( garray[B->j[j]] > cstart) break;
1204       for (l=0; l<bs; l++) {
1205         for (ll=0; ll<bs; ll++) {
1206 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1207         }
1208       }
1209       cnt += bs;
1210     }
1211     for (k=A->i[i]; k<A->i[i+1]; k++) {
1212       for (l=0; l<bs; l++) {
1213         for (ll=0; ll<bs; ll++) {
1214           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1215         }
1216       }
1217       cnt += bs;
1218     }
1219     for (; j<B->i[i+1]; j++) {
1220       for (l=0; l<bs; l++) {
1221         for (ll=0; ll<bs; ll++) {
1222 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1223         }
1224       }
1225       cnt += bs;
1226     }
1227     cnt += (bs-1)*rlen;
1228   }
1229   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1230 
1231   /* store the column values to the file */
1232   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1233   if (!rank) {
1234     MPI_Status status;
1235     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1236     for (i=1; i<size; i++) {
1237       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1238       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1239       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1240       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1241     }
1242     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1243   } else {
1244     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1245     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1246     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1247     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1248   }
1249   ierr = PetscFree(column_values);CHKERRQ(ierr);
1250 
1251   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1252   if (file) {
1253     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatView_MPIBAIJ"
1260 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1261 {
1262   PetscErrorCode ierr;
1263   PetscBool      iascii,isdraw,issocket,isbinary;
1264 
1265   PetscFunctionBegin;
1266   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1267   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1268   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1269   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1270   if (iascii || isdraw || issocket) {
1271     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1272   } else if (isbinary) {
1273     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1274   } else {
1275     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1282 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1283 {
1284   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1285   PetscErrorCode ierr;
1286 
1287   PetscFunctionBegin;
1288 #if defined(PETSC_USE_LOG)
1289   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1290 #endif
1291   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1292   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1293   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1294   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1295 #if defined (PETSC_USE_CTABLE)
1296   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1297 #else
1298   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1299 #endif
1300   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1301   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1302   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1303   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1304   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1305   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1306   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1307   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1308 
1309   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1318   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatMult_MPIBAIJ"
1324 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1325 {
1326   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1327   PetscErrorCode ierr;
1328   PetscInt       nt;
1329 
1330   PetscFunctionBegin;
1331   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1332   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1333   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1334   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1335   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1336   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1337   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1338   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1344 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1345 {
1346   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1347   PetscErrorCode ierr;
1348 
1349   PetscFunctionBegin;
1350   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1351   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1352   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1353   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1359 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1360 {
1361   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1362   PetscErrorCode ierr;
1363   PetscBool      merged;
1364 
1365   PetscFunctionBegin;
1366   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1367   /* do nondiagonal part */
1368   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1369   if (!merged) {
1370     /* send it on its way */
1371     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1372     /* do local part */
1373     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1374     /* receive remote parts: note this assumes the values are not actually */
1375     /* inserted in yy until the next line */
1376     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1377   } else {
1378     /* do local part */
1379     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1380     /* send it on its way */
1381     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1382     /* values actually were received in the Begin() but we need to call this nop */
1383     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1384   }
1385   PetscFunctionReturn(0);
1386 }
1387 
1388 #undef __FUNCT__
1389 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1390 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1391 {
1392   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1393   PetscErrorCode ierr;
1394 
1395   PetscFunctionBegin;
1396   /* do nondiagonal part */
1397   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1398   /* send it on its way */
1399   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1400   /* do local part */
1401   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1402   /* receive remote parts: note this assumes the values are not actually */
1403   /* inserted in yy until the next line, which is true for my implementation*/
1404   /* but is not perhaps always true. */
1405   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 /*
1410   This only works correctly for square matrices where the subblock A->A is the
1411    diagonal block
1412 */
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1415 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1416 {
1417   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1418   PetscErrorCode ierr;
1419 
1420   PetscFunctionBegin;
1421   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1422   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatScale_MPIBAIJ"
1428 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1429 {
1430   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1431   PetscErrorCode ierr;
1432 
1433   PetscFunctionBegin;
1434   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1435   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1441 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1442 {
1443   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1444   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1445   PetscErrorCode ierr;
1446   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1447   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1448   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1449 
1450   PetscFunctionBegin;
1451   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1452   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1453   mat->getrowactive = PETSC_TRUE;
1454 
1455   if (!mat->rowvalues && (idx || v)) {
1456     /*
1457         allocate enough space to hold information from the longest row.
1458     */
1459     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1460     PetscInt     max = 1,mbs = mat->mbs,tmp;
1461     for (i=0; i<mbs; i++) {
1462       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1463       if (max < tmp) { max = tmp; }
1464     }
1465     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1466   }
1467   lrow = row - brstart;
1468 
1469   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1470   if (!v)   {pvA = 0; pvB = 0;}
1471   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1472   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1473   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1474   nztot = nzA + nzB;
1475 
1476   cmap  = mat->garray;
1477   if (v  || idx) {
1478     if (nztot) {
1479       /* Sort by increasing column numbers, assuming A and B already sorted */
1480       PetscInt imark = -1;
1481       if (v) {
1482         *v = v_p = mat->rowvalues;
1483         for (i=0; i<nzB; i++) {
1484           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1485           else break;
1486         }
1487         imark = i;
1488         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1489         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1490       }
1491       if (idx) {
1492         *idx = idx_p = mat->rowindices;
1493         if (imark > -1) {
1494           for (i=0; i<imark; i++) {
1495             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1496           }
1497         } else {
1498           for (i=0; i<nzB; i++) {
1499             if (cmap[cworkB[i]/bs] < cstart)
1500               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1501             else break;
1502           }
1503           imark = i;
1504         }
1505         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1506         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1507       }
1508     } else {
1509       if (idx) *idx = 0;
1510       if (v)   *v   = 0;
1511     }
1512   }
1513   *nz = nztot;
1514   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1515   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 #undef __FUNCT__
1520 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1521 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1522 {
1523   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1524 
1525   PetscFunctionBegin;
1526   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1527   baij->getrowactive = PETSC_FALSE;
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1533 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1534 {
1535   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1536   PetscErrorCode ierr;
1537 
1538   PetscFunctionBegin;
1539   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1540   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1546 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1547 {
1548   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1549   Mat            A = a->A,B = a->B;
1550   PetscErrorCode ierr;
1551   PetscReal      isend[5],irecv[5];
1552 
1553   PetscFunctionBegin;
1554   info->block_size     = (PetscReal)matin->rmap->bs;
1555   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1556   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1557   isend[3] = info->memory;  isend[4] = info->mallocs;
1558   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1559   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1560   isend[3] += info->memory;  isend[4] += info->mallocs;
1561   if (flag == MAT_LOCAL) {
1562     info->nz_used      = isend[0];
1563     info->nz_allocated = isend[1];
1564     info->nz_unneeded  = isend[2];
1565     info->memory       = isend[3];
1566     info->mallocs      = isend[4];
1567   } else if (flag == MAT_GLOBAL_MAX) {
1568     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1569     info->nz_used      = irecv[0];
1570     info->nz_allocated = irecv[1];
1571     info->nz_unneeded  = irecv[2];
1572     info->memory       = irecv[3];
1573     info->mallocs      = irecv[4];
1574   } else if (flag == MAT_GLOBAL_SUM) {
1575     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1576     info->nz_used      = irecv[0];
1577     info->nz_allocated = irecv[1];
1578     info->nz_unneeded  = irecv[2];
1579     info->memory       = irecv[3];
1580     info->mallocs      = irecv[4];
1581   } else {
1582     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1583   }
1584   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1585   info->fill_ratio_needed = 0;
1586   info->factor_mallocs    = 0;
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1592 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1593 {
1594   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1595   PetscErrorCode ierr;
1596 
1597   PetscFunctionBegin;
1598   switch (op) {
1599   case MAT_NEW_NONZERO_LOCATIONS:
1600   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1601   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1602   case MAT_KEEP_NONZERO_PATTERN:
1603   case MAT_NEW_NONZERO_LOCATION_ERR:
1604     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1605     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1606     break;
1607   case MAT_ROW_ORIENTED:
1608     a->roworiented = flg;
1609     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1610     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1611     break;
1612   case MAT_NEW_DIAGONALS:
1613     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1614     break;
1615   case MAT_IGNORE_OFF_PROC_ENTRIES:
1616     a->donotstash = flg;
1617     break;
1618   case MAT_USE_HASH_TABLE:
1619     a->ht_flag = flg;
1620     break;
1621   case MAT_SYMMETRIC:
1622   case MAT_STRUCTURALLY_SYMMETRIC:
1623   case MAT_HERMITIAN:
1624   case MAT_SYMMETRY_ETERNAL:
1625     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1626     break;
1627   default:
1628     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1629   }
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 #undef __FUNCT__
1634 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1635 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1636 {
1637   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1638   Mat_SeqBAIJ    *Aloc;
1639   Mat            B;
1640   PetscErrorCode ierr;
1641   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1642   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1643   MatScalar      *a;
1644 
1645   PetscFunctionBegin;
1646   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1647   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1648     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1649     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1650     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1651     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1652   } else {
1653     B = *matout;
1654   }
1655 
1656   /* copy over the A part */
1657   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1658   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1659   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1660 
1661   for (i=0; i<mbs; i++) {
1662     rvals[0] = bs*(baij->rstartbs + i);
1663     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1664     for (j=ai[i]; j<ai[i+1]; j++) {
1665       col = (baij->cstartbs+aj[j])*bs;
1666       for (k=0; k<bs; k++) {
1667         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1668         col++; a += bs;
1669       }
1670     }
1671   }
1672   /* copy over the B part */
1673   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1674   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1675   for (i=0; i<mbs; i++) {
1676     rvals[0] = bs*(baij->rstartbs + i);
1677     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1678     for (j=ai[i]; j<ai[i+1]; j++) {
1679       col = baij->garray[aj[j]]*bs;
1680       for (k=0; k<bs; k++) {
1681         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1682         col++; a += bs;
1683       }
1684     }
1685   }
1686   ierr = PetscFree(rvals);CHKERRQ(ierr);
1687   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1688   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1689 
1690   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1691     *matout = B;
1692   } else {
1693     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1694   }
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1700 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1701 {
1702   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1703   Mat            a = baij->A,b = baij->B;
1704   PetscErrorCode ierr;
1705   PetscInt       s1,s2,s3;
1706 
1707   PetscFunctionBegin;
1708   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1709   if (rr) {
1710     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1711     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1712     /* Overlap communication with computation. */
1713     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1714   }
1715   if (ll) {
1716     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1717     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1718     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1719   }
1720   /* scale  the diagonal block */
1721   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1722 
1723   if (rr) {
1724     /* Do a scatter end and then right scale the off-diagonal block */
1725     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1726     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1727   }
1728 
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1734 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1735 {
1736   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1737   PetscErrorCode    ierr;
1738   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1739   PetscInt          i,*owners = A->rmap->range;
1740   PetscInt          *nprocs,j,idx,nsends,row;
1741   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1742   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1743   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1744   MPI_Comm          comm = ((PetscObject)A)->comm;
1745   MPI_Request       *send_waits,*recv_waits;
1746   MPI_Status        recv_status,*send_status;
1747   const PetscScalar *xx;
1748   PetscScalar       *bb;
1749 #if defined(PETSC_DEBUG)
1750   PetscBool         found = PETSC_FALSE;
1751 #endif
1752 
1753   PetscFunctionBegin;
1754   /*  first count number of contributors to each processor */
1755   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1756   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1757   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1758   j = 0;
1759   for (i=0; i<N; i++) {
1760     if (lastidx > (idx = rows[i])) j = 0;
1761     lastidx = idx;
1762     for (; j<size; j++) {
1763       if (idx >= owners[j] && idx < owners[j+1]) {
1764         nprocs[2*j]++;
1765         nprocs[2*j+1] = 1;
1766         owner[i] = j;
1767 #if defined(PETSC_DEBUG)
1768         found = PETSC_TRUE;
1769 #endif
1770         break;
1771       }
1772     }
1773 #if defined(PETSC_DEBUG)
1774     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1775     found = PETSC_FALSE;
1776 #endif
1777   }
1778   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1779 
1780   if (A->nooffproczerorows) {
1781     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1782     nrecvs = nsends;
1783     nmax   = N;
1784   } else {
1785     /* inform other processors of number of messages and max length*/
1786     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1787   }
1788 
1789   /* post receives:   */
1790   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1791   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1792   for (i=0; i<nrecvs; i++) {
1793     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1794   }
1795 
1796   /* do sends:
1797      1) starts[i] gives the starting index in svalues for stuff going to
1798      the ith processor
1799   */
1800   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1801   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1802   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1803   starts[0]  = 0;
1804   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1805   for (i=0; i<N; i++) {
1806     svalues[starts[owner[i]]++] = rows[i];
1807   }
1808 
1809   starts[0] = 0;
1810   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1811   count = 0;
1812   for (i=0; i<size; i++) {
1813     if (nprocs[2*i+1]) {
1814       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1815     }
1816   }
1817   ierr = PetscFree(starts);CHKERRQ(ierr);
1818 
1819   base = owners[rank];
1820 
1821   /*  wait on receives */
1822   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1823   count  = nrecvs;
1824   slen = 0;
1825   while (count) {
1826     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1827     /* unpack receives into our local space */
1828     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1829     source[imdex]  = recv_status.MPI_SOURCE;
1830     lens[imdex]    = n;
1831     slen          += n;
1832     count--;
1833   }
1834   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1835 
1836   /* move the data into the send scatter */
1837   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1838   count = 0;
1839   for (i=0; i<nrecvs; i++) {
1840     values = rvalues + i*nmax;
1841     for (j=0; j<lens[i]; j++) {
1842       lrows[count++] = values[j] - base;
1843     }
1844   }
1845   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1846   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1847   ierr = PetscFree(owner);CHKERRQ(ierr);
1848   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1849 
1850   /* fix right hand side if needed */
1851   if (x && b) {
1852     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1853     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1854     for (i=0; i<slen; i++) {
1855       bb[lrows[i]] = diag*xx[lrows[i]];
1856     }
1857     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1858     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1859   }
1860 
1861   /* actually zap the local rows */
1862   /*
1863         Zero the required rows. If the "diagonal block" of the matrix
1864      is square and the user wishes to set the diagonal we use separate
1865      code so that MatSetValues() is not called for each diagonal allocating
1866      new memory, thus calling lots of mallocs and slowing things down.
1867 
1868   */
1869   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1870   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1871   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1872     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1873   } else if (diag != 0.0) {
1874     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1875     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1876        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1877     for (i=0; i<slen; i++) {
1878       row  = lrows[i] + rstart_bs;
1879       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1880     }
1881     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1882     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1883   } else {
1884     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1885   }
1886 
1887   ierr = PetscFree(lrows);CHKERRQ(ierr);
1888 
1889   /* wait on sends */
1890   if (nsends) {
1891     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1892     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1893     ierr = PetscFree(send_status);CHKERRQ(ierr);
1894   }
1895   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1896   ierr = PetscFree(svalues);CHKERRQ(ierr);
1897 
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 #undef __FUNCT__
1902 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1903 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1904 {
1905   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1906   PetscErrorCode ierr;
1907 
1908   PetscFunctionBegin;
1909   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatEqual_MPIBAIJ"
1917 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1918 {
1919   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1920   Mat            a,b,c,d;
1921   PetscBool      flg;
1922   PetscErrorCode ierr;
1923 
1924   PetscFunctionBegin;
1925   a = matA->A; b = matA->B;
1926   c = matB->A; d = matB->B;
1927 
1928   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1929   if (flg) {
1930     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1931   }
1932   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 #undef __FUNCT__
1937 #define __FUNCT__ "MatCopy_MPIBAIJ"
1938 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1939 {
1940   PetscErrorCode ierr;
1941   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1942   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1943 
1944   PetscFunctionBegin;
1945   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1946   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1947     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1948   } else {
1949     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1950     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1951   }
1952   PetscFunctionReturn(0);
1953 }
1954 
1955 #undef __FUNCT__
1956 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1957 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1958 {
1959   PetscErrorCode ierr;
1960 
1961   PetscFunctionBegin;
1962   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 #undef __FUNCT__
1967 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1968 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1969 {
1970   PetscErrorCode ierr;
1971   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1972   PetscBLASInt   bnz,one=1;
1973   Mat_SeqBAIJ    *x,*y;
1974 
1975   PetscFunctionBegin;
1976   if (str == SAME_NONZERO_PATTERN) {
1977     PetscScalar alpha = a;
1978     x = (Mat_SeqBAIJ *)xx->A->data;
1979     y = (Mat_SeqBAIJ *)yy->A->data;
1980     bnz = PetscBLASIntCast(x->nz);
1981     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1982     x = (Mat_SeqBAIJ *)xx->B->data;
1983     y = (Mat_SeqBAIJ *)yy->B->data;
1984     bnz = PetscBLASIntCast(x->nz);
1985     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1986   } else {
1987     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1988   }
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 #undef __FUNCT__
1993 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1994 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1995 {
1996   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1997   PetscInt rbs,cbs;
1998   PetscErrorCode ierr;
1999 
2000   PetscFunctionBegin;
2001   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2002   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2003   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2004   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2005   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2006   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2007   PetscFunctionReturn(0);
2008 }
2009 
2010 #undef __FUNCT__
2011 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2012 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2013 {
2014   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2015   PetscErrorCode ierr;
2016 
2017   PetscFunctionBegin;
2018   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2019   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 #undef __FUNCT__
2024 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2025 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2026 {
2027   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2028   PetscErrorCode ierr;
2029 
2030   PetscFunctionBegin;
2031   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2032   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 #undef __FUNCT__
2037 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2038 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2039 {
2040   PetscErrorCode ierr;
2041   IS             iscol_local;
2042   PetscInt       csize;
2043 
2044   PetscFunctionBegin;
2045   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2046   if (call == MAT_REUSE_MATRIX) {
2047     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2048     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2049   } else {
2050     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2051   }
2052   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2053   if (call == MAT_INITIAL_MATRIX) {
2054     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2055     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2056   }
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 #undef __FUNCT__
2061 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2062 /*
2063     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2064   in local and then by concatenating the local matrices the end result.
2065   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2066 */
2067 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2068 {
2069   PetscErrorCode ierr;
2070   PetscMPIInt    rank,size;
2071   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2072   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2073   Mat            *local,M,Mreuse;
2074   MatScalar      *vwork,*aa;
2075   MPI_Comm       comm = ((PetscObject)mat)->comm;
2076   Mat_SeqBAIJ    *aij;
2077 
2078 
2079   PetscFunctionBegin;
2080   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2081   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2082 
2083   if (call ==  MAT_REUSE_MATRIX) {
2084     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2085     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2086     local = &Mreuse;
2087     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2088   } else {
2089     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2090     Mreuse = *local;
2091     ierr   = PetscFree(local);CHKERRQ(ierr);
2092   }
2093 
2094   /*
2095       m - number of local rows
2096       n - number of columns (same on all processors)
2097       rstart - first row in new global matrix generated
2098   */
2099   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2100   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2101   m    = m/bs;
2102   n    = n/bs;
2103 
2104   if (call == MAT_INITIAL_MATRIX) {
2105     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2106     ii  = aij->i;
2107     jj  = aij->j;
2108 
2109     /*
2110         Determine the number of non-zeros in the diagonal and off-diagonal
2111         portions of the matrix in order to do correct preallocation
2112     */
2113 
2114     /* first get start and end of "diagonal" columns */
2115     if (csize == PETSC_DECIDE) {
2116       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2117       if (mglobal == n*bs) { /* square matrix */
2118 	nlocal = m;
2119       } else {
2120         nlocal = n/size + ((n % size) > rank);
2121       }
2122     } else {
2123       nlocal = csize/bs;
2124     }
2125     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2126     rstart = rend - nlocal;
2127     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2128 
2129     /* next, compute all the lengths */
2130     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2131     olens = dlens + m;
2132     for (i=0; i<m; i++) {
2133       jend = ii[i+1] - ii[i];
2134       olen = 0;
2135       dlen = 0;
2136       for (j=0; j<jend; j++) {
2137         if (*jj < rstart || *jj >= rend) olen++;
2138         else dlen++;
2139         jj++;
2140       }
2141       olens[i] = olen;
2142       dlens[i] = dlen;
2143     }
2144     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2145     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2146     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2147     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2148     ierr = PetscFree(dlens);CHKERRQ(ierr);
2149   } else {
2150     PetscInt ml,nl;
2151 
2152     M = *newmat;
2153     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2154     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2155     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2156     /*
2157          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2158        rather than the slower MatSetValues().
2159     */
2160     M->was_assembled = PETSC_TRUE;
2161     M->assembled     = PETSC_FALSE;
2162   }
2163   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2164   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2165   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2166   ii  = aij->i;
2167   jj  = aij->j;
2168   aa  = aij->a;
2169   for (i=0; i<m; i++) {
2170     row   = rstart/bs + i;
2171     nz    = ii[i+1] - ii[i];
2172     cwork = jj;     jj += nz;
2173     vwork = aa;     aa += nz;
2174     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2175   }
2176 
2177   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2178   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2179   *newmat = M;
2180 
2181   /* save submatrix used in processor for next request */
2182   if (call ==  MAT_INITIAL_MATRIX) {
2183     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2184     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2185   }
2186 
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 #undef __FUNCT__
2191 #define __FUNCT__ "MatPermute_MPIBAIJ"
2192 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2193 {
2194   MPI_Comm       comm,pcomm;
2195   PetscInt       first,local_size,nrows;
2196   const PetscInt *rows;
2197   PetscMPIInt    size;
2198   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2199   PetscErrorCode ierr;
2200 
2201   PetscFunctionBegin;
2202   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2203   /* make a collective version of 'rowp' */
2204   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2205   if (pcomm==comm) {
2206     crowp = rowp;
2207   } else {
2208     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2209     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2210     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2211     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2212   }
2213   /* collect the global row permutation and invert it */
2214   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2215   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2216   if (pcomm!=comm) {
2217     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2218   }
2219   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2220   /* get the local target indices */
2221   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2222   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2223   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2224   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2225   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2226   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2227   /* the column permutation is so much easier;
2228      make a local version of 'colp' and invert it */
2229   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2230   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2231   if (size==1) {
2232     lcolp = colp;
2233   } else {
2234     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2235     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2236     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2237   }
2238   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2239   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2240   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2241   if (size>1) {
2242     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2243     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2244   }
2245   /* now we just get the submatrix */
2246   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2247   /* clean up */
2248   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2249   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2255 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2256 {
2257   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2258   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2259 
2260   PetscFunctionBegin;
2261   if (nghosts) { *nghosts = B->nbs;}
2262   if (ghosts) {*ghosts = baij->garray;}
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2270 /*
2271     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2272 */
2273 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2274 {
2275   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2276   PetscErrorCode        ierr;
2277   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2278   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2279   const PetscInt        *is;
2280   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2281   PetscInt              *rowhit,M,cstart,cend,colb;
2282   PetscInt              *columnsforrow,l;
2283   IS                    *isa;
2284   PetscBool              done,flg;
2285   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2286   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2287 
2288   PetscFunctionBegin;
2289   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2290   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2291 
2292   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2293   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2294   M                = mat->rmap->n/bs;
2295   cstart           = mat->cmap->rstart/bs;
2296   cend             = mat->cmap->rend/bs;
2297   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2298   c->N             = mat->cmap->N/bs;
2299   c->m             = mat->rmap->n/bs;
2300   c->rstart        = mat->rmap->rstart/bs;
2301 
2302   c->ncolors       = nis;
2303   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2304   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2305   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2306   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2307   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2308   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2309 
2310   /* Allow access to data structures of local part of matrix */
2311   if (!baij->colmap) {
2312     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2313   }
2314   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2315   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2316 
2317   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2318   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2319 
2320   for (i=0; i<nis; i++) {
2321     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2322     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2323     c->ncolumns[i] = n;
2324     if (n) {
2325       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2326       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2327       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2328     } else {
2329       c->columns[i]  = 0;
2330     }
2331 
2332     if (ctype == IS_COLORING_GLOBAL){
2333       /* Determine the total (parallel) number of columns of this color */
2334       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2335       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2336 
2337       nn   = PetscMPIIntCast(n);
2338       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2339       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2340       if (!nctot) {
2341         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2342       }
2343 
2344       disp[0] = 0;
2345       for (j=1; j<size; j++) {
2346         disp[j] = disp[j-1] + ncolsonproc[j-1];
2347       }
2348 
2349       /* Get complete list of columns for color on each processor */
2350       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2351       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2352       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2353     } else if (ctype == IS_COLORING_GHOSTED){
2354       /* Determine local number of columns of this color on this process, including ghost points */
2355       nctot = n;
2356       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2357       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2358     } else {
2359       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2360     }
2361 
2362     /*
2363        Mark all rows affect by these columns
2364     */
2365     /* Temporary option to allow for debugging/testing */
2366     flg  = PETSC_FALSE;
2367     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2368     if (!flg) {/*-----------------------------------------------------------------------------*/
2369       /* crude, fast version */
2370       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2371       /* loop over columns*/
2372       for (j=0; j<nctot; j++) {
2373         if (ctype == IS_COLORING_GHOSTED) {
2374           col = ltog[cols[j]];
2375         } else {
2376           col  = cols[j];
2377         }
2378         if (col >= cstart && col < cend) {
2379           /* column is in diagonal block of matrix */
2380           rows = A_cj + A_ci[col-cstart];
2381           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2382         } else {
2383 #if defined (PETSC_USE_CTABLE)
2384           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2385 	  colb --;
2386 #else
2387           colb = baij->colmap[col] - 1;
2388 #endif
2389           if (colb == -1) {
2390             m = 0;
2391           } else {
2392             colb = colb/bs;
2393             rows = B_cj + B_ci[colb];
2394             m    = B_ci[colb+1] - B_ci[colb];
2395           }
2396         }
2397         /* loop over columns marking them in rowhit */
2398         for (k=0; k<m; k++) {
2399           rowhit[*rows++] = col + 1;
2400         }
2401       }
2402 
2403       /* count the number of hits */
2404       nrows = 0;
2405       for (j=0; j<M; j++) {
2406         if (rowhit[j]) nrows++;
2407       }
2408       c->nrows[i]         = nrows;
2409       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2410       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2411       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2412       nrows = 0;
2413       for (j=0; j<M; j++) {
2414         if (rowhit[j]) {
2415           c->rows[i][nrows]           = j;
2416           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2417           nrows++;
2418         }
2419       }
2420     } else {/*-------------------------------------------------------------------------------*/
2421       /* slow version, using rowhit as a linked list */
2422       PetscInt currentcol,fm,mfm;
2423       rowhit[M] = M;
2424       nrows     = 0;
2425       /* loop over columns*/
2426       for (j=0; j<nctot; j++) {
2427         if (ctype == IS_COLORING_GHOSTED) {
2428           col = ltog[cols[j]];
2429         } else {
2430           col  = cols[j];
2431         }
2432         if (col >= cstart && col < cend) {
2433           /* column is in diagonal block of matrix */
2434           rows = A_cj + A_ci[col-cstart];
2435           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2436         } else {
2437 #if defined (PETSC_USE_CTABLE)
2438           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2439           colb --;
2440 #else
2441           colb = baij->colmap[col] - 1;
2442 #endif
2443           if (colb == -1) {
2444             m = 0;
2445           } else {
2446             colb = colb/bs;
2447             rows = B_cj + B_ci[colb];
2448             m    = B_ci[colb+1] - B_ci[colb];
2449           }
2450         }
2451 
2452         /* loop over columns marking them in rowhit */
2453         fm    = M; /* fm points to first entry in linked list */
2454         for (k=0; k<m; k++) {
2455           currentcol = *rows++;
2456 	  /* is it already in the list? */
2457           do {
2458             mfm  = fm;
2459             fm   = rowhit[fm];
2460           } while (fm < currentcol);
2461           /* not in list so add it */
2462           if (fm != currentcol) {
2463             nrows++;
2464             columnsforrow[currentcol] = col;
2465             /* next three lines insert new entry into linked list */
2466             rowhit[mfm]               = currentcol;
2467             rowhit[currentcol]        = fm;
2468             fm                        = currentcol;
2469             /* fm points to present position in list since we know the columns are sorted */
2470           } else {
2471             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2472           }
2473         }
2474       }
2475       c->nrows[i]         = nrows;
2476       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2477       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2478       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2479       /* now store the linked list of rows into c->rows[i] */
2480       nrows = 0;
2481       fm    = rowhit[M];
2482       do {
2483         c->rows[i][nrows]            = fm;
2484         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2485         fm                           = rowhit[fm];
2486       } while (fm < M);
2487     } /* ---------------------------------------------------------------------------------------*/
2488     ierr = PetscFree(cols);CHKERRQ(ierr);
2489   }
2490 
2491   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2492   /*
2493        vscale will contain the "diagonal" on processor scalings followed by the off processor
2494   */
2495   if (ctype == IS_COLORING_GLOBAL) {
2496     PetscInt *garray;
2497     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2498     for (i=0; i<baij->B->cmap->n/bs; i++) {
2499       for (j=0; j<bs; j++) {
2500         garray[i*bs+j] = bs*baij->garray[i]+j;
2501       }
2502     }
2503     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2504     ierr = PetscFree(garray);CHKERRQ(ierr);
2505     CHKMEMQ;
2506     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2507     for (k=0; k<c->ncolors; k++) {
2508       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2509       for (l=0; l<c->nrows[k]; l++) {
2510         col = c->columnsforrow[k][l];
2511         if (col >= cstart && col < cend) {
2512           /* column is in diagonal block of matrix */
2513           colb = col - cstart;
2514         } else {
2515           /* column  is in "off-processor" part */
2516 #if defined (PETSC_USE_CTABLE)
2517           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2518           colb --;
2519 #else
2520           colb = baij->colmap[col] - 1;
2521 #endif
2522           colb = colb/bs;
2523           colb += cend - cstart;
2524         }
2525         c->vscaleforrow[k][l] = colb;
2526       }
2527     }
2528   } else if (ctype == IS_COLORING_GHOSTED) {
2529     /* Get gtol mapping */
2530     PetscInt N = mat->cmap->N, *gtol;
2531     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2532     for (i=0; i<N; i++) gtol[i] = -1;
2533     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2534 
2535     c->vscale = 0; /* will be created in MatFDColoringApply() */
2536     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2537     for (k=0; k<c->ncolors; k++) {
2538       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2539       for (l=0; l<c->nrows[k]; l++) {
2540         col = c->columnsforrow[k][l];      /* global column index */
2541         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2542       }
2543     }
2544     ierr = PetscFree(gtol);CHKERRQ(ierr);
2545   }
2546   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2547 
2548   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2549   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2550   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2551   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2552     CHKMEMQ;
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 #undef __FUNCT__
2557 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2558 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2559 {
2560   Mat            B;
2561   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2562   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2563   Mat_SeqAIJ     *b;
2564   PetscErrorCode ierr;
2565   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2566   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2567   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2568 
2569   PetscFunctionBegin;
2570   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2571   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2572 
2573   /* ----------------------------------------------------------------
2574      Tell every processor the number of nonzeros per row
2575   */
2576   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2577   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2578     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2579   }
2580   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2581   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2582   displs     = recvcounts + size;
2583   for (i=0; i<size; i++) {
2584     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2585     displs[i]     = A->rmap->range[i]/bs;
2586   }
2587 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2588   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2589 #else
2590   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2591 #endif
2592   /* ---------------------------------------------------------------
2593      Create the sequential matrix of the same type as the local block diagonal
2594   */
2595   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2596   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2597   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2598   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2599   b = (Mat_SeqAIJ *)B->data;
2600 
2601   /*--------------------------------------------------------------------
2602     Copy my part of matrix column indices over
2603   */
2604   sendcount  = ad->nz + bd->nz;
2605   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2606   a_jsendbuf = ad->j;
2607   b_jsendbuf = bd->j;
2608   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2609   cnt        = 0;
2610   for (i=0; i<n; i++) {
2611 
2612     /* put in lower diagonal portion */
2613     m = bd->i[i+1] - bd->i[i];
2614     while (m > 0) {
2615       /* is it above diagonal (in bd (compressed) numbering) */
2616       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2617       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2618       m--;
2619     }
2620 
2621     /* put in diagonal portion */
2622     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2623       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2624     }
2625 
2626     /* put in upper diagonal portion */
2627     while (m-- > 0) {
2628       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2629     }
2630   }
2631   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2632 
2633   /*--------------------------------------------------------------------
2634     Gather all column indices to all processors
2635   */
2636   for (i=0; i<size; i++) {
2637     recvcounts[i] = 0;
2638     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2639       recvcounts[i] += lens[j];
2640     }
2641   }
2642   displs[0]  = 0;
2643   for (i=1; i<size; i++) {
2644     displs[i] = displs[i-1] + recvcounts[i-1];
2645   }
2646 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2647   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2648 #else
2649   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2650 #endif
2651   /*--------------------------------------------------------------------
2652     Assemble the matrix into useable form (note numerical values not yet set)
2653   */
2654   /* set the b->ilen (length of each row) values */
2655   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2656   /* set the b->i indices */
2657   b->i[0] = 0;
2658   for (i=1; i<=A->rmap->N/bs; i++) {
2659     b->i[i] = b->i[i-1] + lens[i-1];
2660   }
2661   ierr = PetscFree(lens);CHKERRQ(ierr);
2662   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2663   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2664   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2665 
2666   if (A->symmetric){
2667     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2668   } else if (A->hermitian) {
2669     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2670   } else if (A->structurally_symmetric) {
2671     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2672   }
2673   *newmat = B;
2674   PetscFunctionReturn(0);
2675 }
2676 
2677 #undef __FUNCT__
2678 #define __FUNCT__ "MatSOR_MPIBAIJ"
2679 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2680 {
2681   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2682   PetscErrorCode ierr;
2683   Vec            bb1 = 0;
2684 
2685   PetscFunctionBegin;
2686   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2687     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2688   }
2689 
2690   if (flag == SOR_APPLY_UPPER) {
2691     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2692     PetscFunctionReturn(0);
2693   }
2694 
2695   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2696     if (flag & SOR_ZERO_INITIAL_GUESS) {
2697       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2698       its--;
2699     }
2700 
2701     while (its--) {
2702       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2703       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2704 
2705       /* update rhs: bb1 = bb - B*x */
2706       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2707       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2708 
2709       /* local sweep */
2710       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2711     }
2712   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2713     if (flag & SOR_ZERO_INITIAL_GUESS) {
2714       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2715       its--;
2716     }
2717     while (its--) {
2718       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2719       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2720 
2721       /* update rhs: bb1 = bb - B*x */
2722       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2723       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2724 
2725       /* local sweep */
2726       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2727     }
2728   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2729     if (flag & SOR_ZERO_INITIAL_GUESS) {
2730       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2731       its--;
2732     }
2733     while (its--) {
2734       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2735       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2736 
2737       /* update rhs: bb1 = bb - B*x */
2738       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2739       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2740 
2741       /* local sweep */
2742       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2743     }
2744   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2745 
2746   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2747   PetscFunctionReturn(0);
2748 }
2749 
2750 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2751 
2752 #undef __FUNCT__
2753 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2754 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,PetscScalar **values)
2755 {
2756   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2757   PetscErrorCode ierr;
2758 
2759   PetscFunctionBegin;
2760   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2761   PetscFunctionReturn(0);
2762 }
2763 
2764 
2765 /* -------------------------------------------------------------------*/
2766 static struct _MatOps MatOps_Values = {
2767        MatSetValues_MPIBAIJ,
2768        MatGetRow_MPIBAIJ,
2769        MatRestoreRow_MPIBAIJ,
2770        MatMult_MPIBAIJ,
2771 /* 4*/ MatMultAdd_MPIBAIJ,
2772        MatMultTranspose_MPIBAIJ,
2773        MatMultTransposeAdd_MPIBAIJ,
2774        0,
2775        0,
2776        0,
2777 /*10*/ 0,
2778        0,
2779        0,
2780        MatSOR_MPIBAIJ,
2781        MatTranspose_MPIBAIJ,
2782 /*15*/ MatGetInfo_MPIBAIJ,
2783        MatEqual_MPIBAIJ,
2784        MatGetDiagonal_MPIBAIJ,
2785        MatDiagonalScale_MPIBAIJ,
2786        MatNorm_MPIBAIJ,
2787 /*20*/ MatAssemblyBegin_MPIBAIJ,
2788        MatAssemblyEnd_MPIBAIJ,
2789        MatSetOption_MPIBAIJ,
2790        MatZeroEntries_MPIBAIJ,
2791 /*24*/ MatZeroRows_MPIBAIJ,
2792        0,
2793        0,
2794        0,
2795        0,
2796 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2797        0,
2798        0,
2799        0,
2800        0,
2801 /*34*/ MatDuplicate_MPIBAIJ,
2802        0,
2803        0,
2804        0,
2805        0,
2806 /*39*/ MatAXPY_MPIBAIJ,
2807        MatGetSubMatrices_MPIBAIJ,
2808        MatIncreaseOverlap_MPIBAIJ,
2809        MatGetValues_MPIBAIJ,
2810        MatCopy_MPIBAIJ,
2811 /*44*/ 0,
2812        MatScale_MPIBAIJ,
2813        0,
2814        0,
2815        0,
2816 /*49*/ MatSetBlockSize_MPIBAIJ,
2817        0,
2818        0,
2819        0,
2820        0,
2821 /*54*/ MatFDColoringCreate_MPIBAIJ,
2822        0,
2823        MatSetUnfactored_MPIBAIJ,
2824        MatPermute_MPIBAIJ,
2825        MatSetValuesBlocked_MPIBAIJ,
2826 /*59*/ MatGetSubMatrix_MPIBAIJ,
2827        MatDestroy_MPIBAIJ,
2828        MatView_MPIBAIJ,
2829        0,
2830        0,
2831 /*64*/ 0,
2832        0,
2833        0,
2834        0,
2835        0,
2836 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2837        0,
2838        0,
2839        0,
2840        0,
2841 /*74*/ 0,
2842        MatFDColoringApply_BAIJ,
2843        0,
2844        0,
2845        0,
2846 /*79*/ 0,
2847        0,
2848        0,
2849        0,
2850        MatLoad_MPIBAIJ,
2851 /*84*/ 0,
2852        0,
2853        0,
2854        0,
2855        0,
2856 /*89*/ 0,
2857        0,
2858        0,
2859        0,
2860        0,
2861 /*94*/ 0,
2862        0,
2863        0,
2864        0,
2865        0,
2866 /*99*/ 0,
2867        0,
2868        0,
2869        0,
2870        0,
2871 /*104*/0,
2872        MatRealPart_MPIBAIJ,
2873        MatImaginaryPart_MPIBAIJ,
2874        0,
2875        0,
2876 /*109*/0,
2877        0,
2878        0,
2879        0,
2880        0,
2881 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2882        0,
2883        MatGetGhosts_MPIBAIJ,
2884        0,
2885        0,
2886 /*119*/0,
2887        0,
2888        0,
2889        0,
2890        0,
2891 /*124*/0,
2892        0,
2893        MatInvertBlockDiagonal_MPIBAIJ
2894 };
2895 
2896 EXTERN_C_BEGIN
2897 #undef __FUNCT__
2898 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2899 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2900 {
2901   PetscFunctionBegin;
2902   *a = ((Mat_MPIBAIJ *)A->data)->A;
2903   PetscFunctionReturn(0);
2904 }
2905 EXTERN_C_END
2906 
2907 EXTERN_C_BEGIN
2908 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2909 EXTERN_C_END
2910 
2911 EXTERN_C_BEGIN
2912 #undef __FUNCT__
2913 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2914 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2915 {
2916   PetscInt       m,rstart,cstart,cend;
2917   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2918   const PetscInt *JJ=0;
2919   PetscScalar    *values=0;
2920   PetscErrorCode ierr;
2921 
2922   PetscFunctionBegin;
2923 
2924   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2925   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2926   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2927   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2928   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2929   m      = B->rmap->n/bs;
2930   rstart = B->rmap->rstart/bs;
2931   cstart = B->cmap->rstart/bs;
2932   cend   = B->cmap->rend/bs;
2933 
2934   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2935   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2936   for (i=0; i<m; i++) {
2937     nz = ii[i+1] - ii[i];
2938     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2939     nz_max = PetscMax(nz_max,nz);
2940     JJ  = jj + ii[i];
2941     for (j=0; j<nz; j++) {
2942       if (*JJ >= cstart) break;
2943       JJ++;
2944     }
2945     d = 0;
2946     for (; j<nz; j++) {
2947       if (*JJ++ >= cend) break;
2948       d++;
2949     }
2950     d_nnz[i] = d;
2951     o_nnz[i] = nz - d;
2952   }
2953   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2954   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2955 
2956   values = (PetscScalar*)V;
2957   if (!values) {
2958     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2959     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2960   }
2961   for (i=0; i<m; i++) {
2962     PetscInt          row    = i + rstart;
2963     PetscInt          ncols  = ii[i+1] - ii[i];
2964     const PetscInt    *icols = jj + ii[i];
2965     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2966     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2967   }
2968 
2969   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2970   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2971   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2972   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2973   PetscFunctionReturn(0);
2974 }
2975 EXTERN_C_END
2976 
2977 #undef __FUNCT__
2978 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2979 /*@C
2980    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2981    (the default parallel PETSc format).
2982 
2983    Collective on MPI_Comm
2984 
2985    Input Parameters:
2986 +  A - the matrix
2987 .  bs - the block size
2988 .  i - the indices into j for the start of each local row (starts with zero)
2989 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2990 -  v - optional values in the matrix
2991 
2992    Level: developer
2993 
2994 .keywords: matrix, aij, compressed row, sparse, parallel
2995 
2996 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2997 @*/
2998 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2999 {
3000   PetscErrorCode ierr;
3001 
3002   PetscFunctionBegin;
3003   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3004   PetscValidType(B,1);
3005   PetscValidLogicalCollectiveInt(B,bs,2);
3006   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3007   PetscFunctionReturn(0);
3008 }
3009 
3010 EXTERN_C_BEGIN
3011 #undef __FUNCT__
3012 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3013 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
3014 {
3015   Mat_MPIBAIJ    *b;
3016   PetscErrorCode ierr;
3017   PetscInt       i, newbs = PetscAbs(bs);
3018 
3019   PetscFunctionBegin;
3020   if (bs < 0) {
3021     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
3022       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3023     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3024     bs   = PetscAbs(bs);
3025   }
3026   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
3027   bs = newbs;
3028 
3029 
3030   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
3031   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3032   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3033   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3034   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3035 
3036   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3037   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3038   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3039   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3040 
3041   if (d_nnz) {
3042     for (i=0; i<B->rmap->n/bs; i++) {
3043       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3044     }
3045   }
3046   if (o_nnz) {
3047     for (i=0; i<B->rmap->n/bs; i++) {
3048       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3049     }
3050   }
3051 
3052   b = (Mat_MPIBAIJ*)B->data;
3053   b->bs2 = bs*bs;
3054   b->mbs = B->rmap->n/bs;
3055   b->nbs = B->cmap->n/bs;
3056   b->Mbs = B->rmap->N/bs;
3057   b->Nbs = B->cmap->N/bs;
3058 
3059   for (i=0; i<=b->size; i++) {
3060     b->rangebs[i] = B->rmap->range[i]/bs;
3061   }
3062   b->rstartbs = B->rmap->rstart/bs;
3063   b->rendbs   = B->rmap->rend/bs;
3064   b->cstartbs = B->cmap->rstart/bs;
3065   b->cendbs   = B->cmap->rend/bs;
3066 
3067   if (!B->preallocated) {
3068     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3069     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3070     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3071     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3072     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3073     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3074     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3075     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3076     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3077   }
3078 
3079   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3080   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3081   B->preallocated = PETSC_TRUE;
3082   ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3083   PetscFunctionReturn(0);
3084 }
3085 EXTERN_C_END
3086 
3087 EXTERN_C_BEGIN
3088 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3089 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3090 EXTERN_C_END
3091 
3092 
3093 EXTERN_C_BEGIN
3094 #undef __FUNCT__
3095 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3096 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3097 {
3098   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3099   PetscErrorCode ierr;
3100   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3101   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3102   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3103 
3104   PetscFunctionBegin;
3105   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3106   ii[0] = 0;
3107   CHKMEMQ;
3108   for (i=0; i<M; i++) {
3109     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3110     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3111     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3112     /* remove one from count of matrix has diagonal */
3113     for (j=id[i]; j<id[i+1]; j++) {
3114       if (jd[j] == i) {ii[i+1]--;break;}
3115     }
3116   CHKMEMQ;
3117   }
3118   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3119   cnt = 0;
3120   for (i=0; i<M; i++) {
3121     for (j=io[i]; j<io[i+1]; j++) {
3122       if (garray[jo[j]] > rstart) break;
3123       jj[cnt++] = garray[jo[j]];
3124   CHKMEMQ;
3125     }
3126     for (k=id[i]; k<id[i+1]; k++) {
3127       if (jd[k] != i) {
3128         jj[cnt++] = rstart + jd[k];
3129   CHKMEMQ;
3130       }
3131     }
3132     for (;j<io[i+1]; j++) {
3133       jj[cnt++] = garray[jo[j]];
3134   CHKMEMQ;
3135     }
3136   }
3137   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3138   PetscFunctionReturn(0);
3139 }
3140 EXTERN_C_END
3141 
3142 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3143 EXTERN_C_BEGIN
3144 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
3145 EXTERN_C_END
3146 
3147 EXTERN_C_BEGIN
3148 #undef __FUNCT__
3149 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3150 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
3151 {
3152   PetscErrorCode ierr;
3153   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3154   Mat            B;
3155   Mat_MPIAIJ     *b;
3156 
3157   PetscFunctionBegin;
3158   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3159 
3160   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3161   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3162   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3163   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3164   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3165   b = (Mat_MPIAIJ*) B->data;
3166 
3167   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3168   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3169   ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3170   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3171   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3172   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3173   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3174   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3175   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3176   if (reuse == MAT_REUSE_MATRIX) {
3177     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3178   } else {
3179    *newmat = B;
3180   }
3181   PetscFunctionReturn(0);
3182 }
3183 EXTERN_C_END
3184 
3185 EXTERN_C_BEGIN
3186 #if defined(PETSC_HAVE_MUMPS)
3187 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3188 #endif
3189 EXTERN_C_END
3190 
3191 /*MC
3192    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3193 
3194    Options Database Keys:
3195 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3196 . -mat_block_size <bs> - set the blocksize used to store the matrix
3197 - -mat_use_hash_table <fact>
3198 
3199   Level: beginner
3200 
3201 .seealso: MatCreateMPIBAIJ
3202 M*/
3203 
3204 EXTERN_C_BEGIN
3205 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*);
3206 EXTERN_C_END
3207 
3208 EXTERN_C_BEGIN
3209 #undef __FUNCT__
3210 #define __FUNCT__ "MatCreate_MPIBAIJ"
3211 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3212 {
3213   Mat_MPIBAIJ    *b;
3214   PetscErrorCode ierr;
3215   PetscBool      flg;
3216 
3217   PetscFunctionBegin;
3218   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3219   B->data = (void*)b;
3220 
3221   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3222   B->assembled  = PETSC_FALSE;
3223 
3224   B->insertmode = NOT_SET_VALUES;
3225   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3226   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3227 
3228   /* build local table of row and column ownerships */
3229   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3230 
3231   /* build cache for off array entries formed */
3232   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3233   b->donotstash  = PETSC_FALSE;
3234   b->colmap      = PETSC_NULL;
3235   b->garray      = PETSC_NULL;
3236   b->roworiented = PETSC_TRUE;
3237 
3238   /* stuff used in block assembly */
3239   b->barray       = 0;
3240 
3241   /* stuff used for matrix vector multiply */
3242   b->lvec         = 0;
3243   b->Mvctx        = 0;
3244 
3245   /* stuff for MatGetRow() */
3246   b->rowindices   = 0;
3247   b->rowvalues    = 0;
3248   b->getrowactive = PETSC_FALSE;
3249 
3250   /* hash table stuff */
3251   b->ht           = 0;
3252   b->hd           = 0;
3253   b->ht_size      = 0;
3254   b->ht_flag      = PETSC_FALSE;
3255   b->ht_fact      = 0;
3256   b->ht_total_ct  = 0;
3257   b->ht_insert_ct = 0;
3258 
3259   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3260   b->ijonly       = PETSC_FALSE;
3261 
3262   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3263     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3264     if (flg) {
3265       PetscReal fact = 1.39;
3266       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3267       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3268       if (fact <= 1.0) fact = 1.39;
3269       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3270       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3271     }
3272   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3273 
3274 #if defined(PETSC_HAVE_MUMPS)
3275   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3276 #endif
3277   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3278                                      "MatConvert_MPIBAIJ_MPIAdj",
3279                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3280   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3281                                      "MatConvert_MPIBAIJ_MPIAIJ",
3282                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3283   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3284                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3285                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3286   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3287                                      "MatStoreValues_MPIBAIJ",
3288                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3289   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3290                                      "MatRetrieveValues_MPIBAIJ",
3291                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3292   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3293                                      "MatGetDiagonalBlock_MPIBAIJ",
3294                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3295   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3296                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3297                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3298   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3299 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3300 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3301   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3302                                      "MatDiagonalScaleLocal_MPIBAIJ",
3303                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3304   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3305                                      "MatSetHashTableFactor_MPIBAIJ",
3306                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3307   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3308                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3309                                       MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3310   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3311   PetscFunctionReturn(0);
3312 }
3313 EXTERN_C_END
3314 
3315 /*MC
3316    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3317 
3318    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3319    and MATMPIBAIJ otherwise.
3320 
3321    Options Database Keys:
3322 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3323 
3324   Level: beginner
3325 
3326 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3327 M*/
3328 
3329 #undef __FUNCT__
3330 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3331 /*@C
3332    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3333    (block compressed row).  For good matrix assembly performance
3334    the user should preallocate the matrix storage by setting the parameters
3335    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3336    performance can be increased by more than a factor of 50.
3337 
3338    Collective on Mat
3339 
3340    Input Parameters:
3341 +  A - the matrix
3342 .  bs   - size of blockk
3343 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3344            submatrix  (same for all local rows)
3345 .  d_nnz - array containing the number of block nonzeros in the various block rows
3346            of the in diagonal portion of the local (possibly different for each block
3347            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3348            set it even if it is zero.
3349 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3350            submatrix (same for all local rows).
3351 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3352            off-diagonal portion of the local submatrix (possibly different for
3353            each block row) or PETSC_NULL.
3354 
3355    If the *_nnz parameter is given then the *_nz parameter is ignored
3356 
3357    Options Database Keys:
3358 +   -mat_block_size - size of the blocks to use
3359 -   -mat_use_hash_table <fact>
3360 
3361    Notes:
3362    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3363    than it must be used on all processors that share the object for that argument.
3364 
3365    Storage Information:
3366    For a square global matrix we define each processor's diagonal portion
3367    to be its local rows and the corresponding columns (a square submatrix);
3368    each processor's off-diagonal portion encompasses the remainder of the
3369    local matrix (a rectangular submatrix).
3370 
3371    The user can specify preallocated storage for the diagonal part of
3372    the local submatrix with either d_nz or d_nnz (not both).  Set
3373    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3374    memory allocation.  Likewise, specify preallocated storage for the
3375    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3376 
3377    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3378    the figure below we depict these three local rows and all columns (0-11).
3379 
3380 .vb
3381            0 1 2 3 4 5 6 7 8 9 10 11
3382           -------------------
3383    row 3  |  o o o d d d o o o o o o
3384    row 4  |  o o o d d d o o o o o o
3385    row 5  |  o o o d d d o o o o o o
3386           -------------------
3387 .ve
3388 
3389    Thus, any entries in the d locations are stored in the d (diagonal)
3390    submatrix, and any entries in the o locations are stored in the
3391    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3392    stored simply in the MATSEQBAIJ format for compressed row storage.
3393 
3394    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3395    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3396    In general, for PDE problems in which most nonzeros are near the diagonal,
3397    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3398    or you will get TERRIBLE performance; see the users' manual chapter on
3399    matrices.
3400 
3401    You can call MatGetInfo() to get information on how effective the preallocation was;
3402    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3403    You can also run with the option -info and look for messages with the string
3404    malloc in them to see if additional memory allocation was needed.
3405 
3406    Level: intermediate
3407 
3408 .keywords: matrix, block, aij, compressed row, sparse, parallel
3409 
3410 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3411 @*/
3412 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3413 {
3414   PetscErrorCode ierr;
3415 
3416   PetscFunctionBegin;
3417   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3418   PetscValidType(B,1);
3419   PetscValidLogicalCollectiveInt(B,bs,2);
3420   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3421   PetscFunctionReturn(0);
3422 }
3423 
3424 #undef __FUNCT__
3425 #define __FUNCT__ "MatCreateMPIBAIJ"
3426 /*@C
3427    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3428    (block compressed row).  For good matrix assembly performance
3429    the user should preallocate the matrix storage by setting the parameters
3430    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3431    performance can be increased by more than a factor of 50.
3432 
3433    Collective on MPI_Comm
3434 
3435    Input Parameters:
3436 +  comm - MPI communicator
3437 .  bs   - size of blockk
3438 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3439            This value should be the same as the local size used in creating the
3440            y vector for the matrix-vector product y = Ax.
3441 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3442            This value should be the same as the local size used in creating the
3443            x vector for the matrix-vector product y = Ax.
3444 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3445 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3446 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3447            submatrix  (same for all local rows)
3448 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3449            of the in diagonal portion of the local (possibly different for each block
3450            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3451            and set it even if it is zero.
3452 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3453            submatrix (same for all local rows).
3454 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3455            off-diagonal portion of the local submatrix (possibly different for
3456            each block row) or PETSC_NULL.
3457 
3458    Output Parameter:
3459 .  A - the matrix
3460 
3461    Options Database Keys:
3462 +   -mat_block_size - size of the blocks to use
3463 -   -mat_use_hash_table <fact>
3464 
3465    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3466    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3467    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3468 
3469    Notes:
3470    If the *_nnz parameter is given then the *_nz parameter is ignored
3471 
3472    A nonzero block is any block that as 1 or more nonzeros in it
3473 
3474    The user MUST specify either the local or global matrix dimensions
3475    (possibly both).
3476 
3477    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3478    than it must be used on all processors that share the object for that argument.
3479 
3480    Storage Information:
3481    For a square global matrix we define each processor's diagonal portion
3482    to be its local rows and the corresponding columns (a square submatrix);
3483    each processor's off-diagonal portion encompasses the remainder of the
3484    local matrix (a rectangular submatrix).
3485 
3486    The user can specify preallocated storage for the diagonal part of
3487    the local submatrix with either d_nz or d_nnz (not both).  Set
3488    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3489    memory allocation.  Likewise, specify preallocated storage for the
3490    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3491 
3492    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3493    the figure below we depict these three local rows and all columns (0-11).
3494 
3495 .vb
3496            0 1 2 3 4 5 6 7 8 9 10 11
3497           -------------------
3498    row 3  |  o o o d d d o o o o o o
3499    row 4  |  o o o d d d o o o o o o
3500    row 5  |  o o o d d d o o o o o o
3501           -------------------
3502 .ve
3503 
3504    Thus, any entries in the d locations are stored in the d (diagonal)
3505    submatrix, and any entries in the o locations are stored in the
3506    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3507    stored simply in the MATSEQBAIJ format for compressed row storage.
3508 
3509    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3510    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3511    In general, for PDE problems in which most nonzeros are near the diagonal,
3512    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3513    or you will get TERRIBLE performance; see the users' manual chapter on
3514    matrices.
3515 
3516    Level: intermediate
3517 
3518 .keywords: matrix, block, aij, compressed row, sparse, parallel
3519 
3520 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3521 @*/
3522 PetscErrorCode  MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3523 {
3524   PetscErrorCode ierr;
3525   PetscMPIInt    size;
3526 
3527   PetscFunctionBegin;
3528   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3529   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3530   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3531   if (size > 1) {
3532     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3533     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3534   } else {
3535     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3536     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3537   }
3538   PetscFunctionReturn(0);
3539 }
3540 
3541 #undef __FUNCT__
3542 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3543 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3544 {
3545   Mat            mat;
3546   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3547   PetscErrorCode ierr;
3548   PetscInt       len=0;
3549 
3550   PetscFunctionBegin;
3551   *newmat       = 0;
3552   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3553   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3554   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3555   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3556 
3557   mat->factortype   = matin->factortype;
3558   mat->preallocated = PETSC_TRUE;
3559   mat->assembled    = PETSC_TRUE;
3560   mat->insertmode   = NOT_SET_VALUES;
3561 
3562   a      = (Mat_MPIBAIJ*)mat->data;
3563   mat->rmap->bs  = matin->rmap->bs;
3564   a->bs2   = oldmat->bs2;
3565   a->mbs   = oldmat->mbs;
3566   a->nbs   = oldmat->nbs;
3567   a->Mbs   = oldmat->Mbs;
3568   a->Nbs   = oldmat->Nbs;
3569 
3570   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3571   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3572 
3573   a->size         = oldmat->size;
3574   a->rank         = oldmat->rank;
3575   a->donotstash   = oldmat->donotstash;
3576   a->roworiented  = oldmat->roworiented;
3577   a->rowindices   = 0;
3578   a->rowvalues    = 0;
3579   a->getrowactive = PETSC_FALSE;
3580   a->barray       = 0;
3581   a->rstartbs     = oldmat->rstartbs;
3582   a->rendbs       = oldmat->rendbs;
3583   a->cstartbs     = oldmat->cstartbs;
3584   a->cendbs       = oldmat->cendbs;
3585 
3586   /* hash table stuff */
3587   a->ht           = 0;
3588   a->hd           = 0;
3589   a->ht_size      = 0;
3590   a->ht_flag      = oldmat->ht_flag;
3591   a->ht_fact      = oldmat->ht_fact;
3592   a->ht_total_ct  = 0;
3593   a->ht_insert_ct = 0;
3594 
3595   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3596   if (oldmat->colmap) {
3597 #if defined (PETSC_USE_CTABLE)
3598   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3599 #else
3600   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3601   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3602   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3603 #endif
3604   } else a->colmap = 0;
3605 
3606   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3607     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3608     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3609     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3610   } else a->garray = 0;
3611 
3612   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3613   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3614   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3615   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3616   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3617 
3618   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3619   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3620   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3621   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3622   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3623   *newmat = mat;
3624 
3625   PetscFunctionReturn(0);
3626 }
3627 
3628 #undef __FUNCT__
3629 #define __FUNCT__ "MatLoad_MPIBAIJ"
3630 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3631 {
3632   PetscErrorCode ierr;
3633   int            fd;
3634   PetscInt       i,nz,j,rstart,rend;
3635   PetscScalar    *vals,*buf;
3636   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3637   MPI_Status     status;
3638   PetscMPIInt    rank,size,maxnz;
3639   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3640   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3641   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3642   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3643   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3644   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3645 
3646   PetscFunctionBegin;
3647   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3648     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3649   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3650 
3651   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3652   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3653   if (!rank) {
3654     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3655     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3656     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3657   }
3658 
3659   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3660 
3661   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3662   M = header[1]; N = header[2];
3663 
3664   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3665   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3666   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3667 
3668   /* If global sizes are set, check if they are consistent with that given in the file */
3669   if (sizesset) {
3670     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3671   }
3672   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3673   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3674 
3675   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3676 
3677   /*
3678      This code adds extra rows to make sure the number of rows is
3679      divisible by the blocksize
3680   */
3681   Mbs        = M/bs;
3682   extra_rows = bs - M + bs*Mbs;
3683   if (extra_rows == bs) extra_rows = 0;
3684   else                  Mbs++;
3685   if (extra_rows && !rank) {
3686     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3687   }
3688 
3689   /* determine ownership of all rows */
3690   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3691     mbs        = Mbs/size + ((Mbs % size) > rank);
3692     m          = mbs*bs;
3693   } else { /* User set */
3694     m          = newmat->rmap->n;
3695     mbs        = m/bs;
3696   }
3697   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3698   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3699 
3700   /* process 0 needs enough room for process with most rows */
3701   if (!rank) {
3702     mmax = rowners[1];
3703     for (i=2; i<size; i++) {
3704       mmax = PetscMax(mmax,rowners[i]);
3705     }
3706     mmax*=bs;
3707   } else mmax = m;
3708 
3709   rowners[0] = 0;
3710   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3711   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3712   rstart = rowners[rank];
3713   rend   = rowners[rank+1];
3714 
3715   /* distribute row lengths to all processors */
3716   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3717   if (!rank) {
3718     mend = m;
3719     if (size == 1) mend = mend - extra_rows;
3720     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3721     for (j=mend; j<m; j++) locrowlens[j] = 1;
3722     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3723     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3724     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3725     for (j=0; j<m; j++) {
3726       procsnz[0] += locrowlens[j];
3727     }
3728     for (i=1; i<size; i++) {
3729       mend = browners[i+1] - browners[i];
3730       if (i == size-1) mend = mend - extra_rows;
3731       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3732       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3733       /* calculate the number of nonzeros on each processor */
3734       for (j=0; j<browners[i+1]-browners[i]; j++) {
3735         procsnz[i] += rowlengths[j];
3736       }
3737       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3738     }
3739     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3740   } else {
3741     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3742   }
3743 
3744   if (!rank) {
3745     /* determine max buffer needed and allocate it */
3746     maxnz = procsnz[0];
3747     for (i=1; i<size; i++) {
3748       maxnz = PetscMax(maxnz,procsnz[i]);
3749     }
3750     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3751 
3752     /* read in my part of the matrix column indices  */
3753     nz     = procsnz[0];
3754     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3755     mycols = ibuf;
3756     if (size == 1)  nz -= extra_rows;
3757     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3758     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3759 
3760     /* read in every ones (except the last) and ship off */
3761     for (i=1; i<size-1; i++) {
3762       nz   = procsnz[i];
3763       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3764       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3765     }
3766     /* read in the stuff for the last proc */
3767     if (size != 1) {
3768       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3769       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3770       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3771       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3772     }
3773     ierr = PetscFree(cols);CHKERRQ(ierr);
3774   } else {
3775     /* determine buffer space needed for message */
3776     nz = 0;
3777     for (i=0; i<m; i++) {
3778       nz += locrowlens[i];
3779     }
3780     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3781     mycols = ibuf;
3782     /* receive message of column indices*/
3783     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3784     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3785     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3786   }
3787 
3788   /* loop over local rows, determining number of off diagonal entries */
3789   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3790   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3791   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3792   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3793   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3794   rowcount = 0; nzcount = 0;
3795   for (i=0; i<mbs; i++) {
3796     dcount  = 0;
3797     odcount = 0;
3798     for (j=0; j<bs; j++) {
3799       kmax = locrowlens[rowcount];
3800       for (k=0; k<kmax; k++) {
3801         tmp = mycols[nzcount++]/bs;
3802         if (!mask[tmp]) {
3803           mask[tmp] = 1;
3804           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3805           else masked1[dcount++] = tmp;
3806         }
3807       }
3808       rowcount++;
3809     }
3810 
3811     dlens[i]  = dcount;
3812     odlens[i] = odcount;
3813 
3814     /* zero out the mask elements we set */
3815     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3816     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3817   }
3818 
3819 
3820   if (!sizesset) {
3821     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3822   }
3823   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3824 
3825   if (!rank) {
3826     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3827     /* read in my part of the matrix numerical values  */
3828     nz = procsnz[0];
3829     vals = buf;
3830     mycols = ibuf;
3831     if (size == 1)  nz -= extra_rows;
3832     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3833     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3834 
3835     /* insert into matrix */
3836     jj      = rstart*bs;
3837     for (i=0; i<m; i++) {
3838       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3839       mycols += locrowlens[i];
3840       vals   += locrowlens[i];
3841       jj++;
3842     }
3843     /* read in other processors (except the last one) and ship out */
3844     for (i=1; i<size-1; i++) {
3845       nz   = procsnz[i];
3846       vals = buf;
3847       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3848       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3849     }
3850     /* the last proc */
3851     if (size != 1){
3852       nz   = procsnz[i] - extra_rows;
3853       vals = buf;
3854       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3855       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3856       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3857     }
3858     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3859   } else {
3860     /* receive numeric values */
3861     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3862 
3863     /* receive message of values*/
3864     vals   = buf;
3865     mycols = ibuf;
3866     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3867     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3868     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3869 
3870     /* insert into matrix */
3871     jj      = rstart*bs;
3872     for (i=0; i<m; i++) {
3873       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3874       mycols += locrowlens[i];
3875       vals   += locrowlens[i];
3876       jj++;
3877     }
3878   }
3879   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3880   ierr = PetscFree(buf);CHKERRQ(ierr);
3881   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3882   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3883   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3884   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3885   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3886   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3887 
3888   PetscFunctionReturn(0);
3889 }
3890 
3891 #undef __FUNCT__
3892 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3893 /*@
3894    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3895 
3896    Input Parameters:
3897 .  mat  - the matrix
3898 .  fact - factor
3899 
3900    Not Collective, each process can use a different factor
3901 
3902    Level: advanced
3903 
3904   Notes:
3905    This can also be set by the command line option: -mat_use_hash_table <fact>
3906 
3907 .keywords: matrix, hashtable, factor, HT
3908 
3909 .seealso: MatSetOption()
3910 @*/
3911 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3912 {
3913   PetscErrorCode ierr;
3914 
3915   PetscFunctionBegin;
3916   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3917   PetscFunctionReturn(0);
3918 }
3919 
3920 EXTERN_C_BEGIN
3921 #undef __FUNCT__
3922 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3923 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3924 {
3925   Mat_MPIBAIJ *baij;
3926 
3927   PetscFunctionBegin;
3928   baij = (Mat_MPIBAIJ*)mat->data;
3929   baij->ht_fact = fact;
3930   PetscFunctionReturn(0);
3931 }
3932 EXTERN_C_END
3933 
3934 #undef __FUNCT__
3935 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3936 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3937 {
3938   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3939   PetscFunctionBegin;
3940   *Ad     = a->A;
3941   *Ao     = a->B;
3942   *colmap = a->garray;
3943   PetscFunctionReturn(0);
3944 }
3945 
3946 /*
3947     Special version for direct calls from Fortran (to eliminate two function call overheads
3948 */
3949 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3950 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3951 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3952 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3953 #endif
3954 
3955 #undef __FUNCT__
3956 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3957 /*@C
3958   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3959 
3960   Collective on Mat
3961 
3962   Input Parameters:
3963 + mat - the matrix
3964 . min - number of input rows
3965 . im - input rows
3966 . nin - number of input columns
3967 . in - input columns
3968 . v - numerical values input
3969 - addvin - INSERT_VALUES or ADD_VALUES
3970 
3971   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3972 
3973   Level: advanced
3974 
3975 .seealso:   MatSetValuesBlocked()
3976 @*/
3977 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3978 {
3979   /* convert input arguments to C version */
3980   Mat             mat = *matin;
3981   PetscInt        m = *min, n = *nin;
3982   InsertMode      addv = *addvin;
3983 
3984   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3985   const MatScalar *value;
3986   MatScalar       *barray=baij->barray;
3987   PetscBool       roworiented = baij->roworiented;
3988   PetscErrorCode  ierr;
3989   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3990   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3991   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3992 
3993   PetscFunctionBegin;
3994   /* tasks normally handled by MatSetValuesBlocked() */
3995   if (mat->insertmode == NOT_SET_VALUES) {
3996     mat->insertmode = addv;
3997   }
3998 #if defined(PETSC_USE_DEBUG)
3999   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4000   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4001 #endif
4002   if (mat->assembled) {
4003     mat->was_assembled = PETSC_TRUE;
4004     mat->assembled     = PETSC_FALSE;
4005   }
4006   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4007 
4008 
4009   if(!barray) {
4010     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
4011     baij->barray = barray;
4012   }
4013 
4014   if (roworiented) {
4015     stepval = (n-1)*bs;
4016   } else {
4017     stepval = (m-1)*bs;
4018   }
4019   for (i=0; i<m; i++) {
4020     if (im[i] < 0) continue;
4021 #if defined(PETSC_USE_DEBUG)
4022     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4023 #endif
4024     if (im[i] >= rstart && im[i] < rend) {
4025       row = im[i] - rstart;
4026       for (j=0; j<n; j++) {
4027         /* If NumCol = 1 then a copy is not required */
4028         if ((roworiented) && (n == 1)) {
4029           barray = (MatScalar*)v + i*bs2;
4030         } else if((!roworiented) && (m == 1)) {
4031           barray = (MatScalar*)v + j*bs2;
4032         } else { /* Here a copy is required */
4033           if (roworiented) {
4034             value = v + i*(stepval+bs)*bs + j*bs;
4035           } else {
4036             value = v + j*(stepval+bs)*bs + i*bs;
4037           }
4038           for (ii=0; ii<bs; ii++,value+=stepval) {
4039             for (jj=0; jj<bs; jj++) {
4040               *barray++  = *value++;
4041             }
4042           }
4043           barray -=bs2;
4044         }
4045 
4046         if (in[j] >= cstart && in[j] < cend){
4047           col  = in[j] - cstart;
4048           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4049         }
4050         else if (in[j] < 0) continue;
4051 #if defined(PETSC_USE_DEBUG)
4052         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4053 #endif
4054         else {
4055           if (mat->was_assembled) {
4056             if (!baij->colmap) {
4057               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4058             }
4059 
4060 #if defined(PETSC_USE_DEBUG)
4061 #if defined (PETSC_USE_CTABLE)
4062             { PetscInt data;
4063               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4064               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4065             }
4066 #else
4067             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4068 #endif
4069 #endif
4070 #if defined (PETSC_USE_CTABLE)
4071 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4072             col  = (col - 1)/bs;
4073 #else
4074             col = (baij->colmap[in[j]] - 1)/bs;
4075 #endif
4076             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4077               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4078               col =  in[j];
4079             }
4080           }
4081           else col = in[j];
4082           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4083         }
4084       }
4085     } else {
4086       if (!baij->donotstash) {
4087         if (roworiented) {
4088           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4089         } else {
4090           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4091         }
4092       }
4093     }
4094   }
4095 
4096   /* task normally handled by MatSetValuesBlocked() */
4097   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4098   PetscFunctionReturn(0);
4099 }
4100 
4101 #undef __FUNCT__
4102 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4103 /*@
4104      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4105          CSR format the local rows.
4106 
4107    Collective on MPI_Comm
4108 
4109    Input Parameters:
4110 +  comm - MPI communicator
4111 .  bs - the block size, only a block size of 1 is supported
4112 .  m - number of local rows (Cannot be PETSC_DECIDE)
4113 .  n - This value should be the same as the local size used in creating the
4114        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4115        calculated if N is given) For square matrices n is almost always m.
4116 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4117 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4118 .   i - row indices
4119 .   j - column indices
4120 -   a - matrix values
4121 
4122    Output Parameter:
4123 .   mat - the matrix
4124 
4125    Level: intermediate
4126 
4127    Notes:
4128        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4129      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4130      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4131 
4132        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4133 
4134 .keywords: matrix, aij, compressed row, sparse, parallel
4135 
4136 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4137           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
4138 @*/
4139 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4140 {
4141   PetscErrorCode ierr;
4142 
4143 
4144  PetscFunctionBegin;
4145   if (i[0]) {
4146     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4147   }
4148   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4149   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4150   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4151   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4152   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4153   PetscFunctionReturn(0);
4154 }
4155