xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 3192c3e1bf96856519ae22ffd756f12fe2bc5489)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++){
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++){
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig){
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         } else if (in[j] < 0) continue;
229 #if defined(PETSC_USE_DEBUG)
230         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
231 #endif
232         else {
233           if (mat->was_assembled) {
234             if (!baij->colmap) {
235               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
236             }
237 #if defined (PETSC_USE_CTABLE)
238             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
239             col  = col - 1;
240 #else
241             col = baij->colmap[in[j]/bs] - 1;
242 #endif
243             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
244               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
245               col =  in[j];
246               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
247               B = baij->B;
248               b = (Mat_SeqBAIJ*)(B)->data;
249               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
250               ba=b->a;
251             } else col += in[j]%bs;
252           } else col = in[j];
253           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
254           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
260       if (!baij->donotstash) {
261         if (roworiented) {
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
263         } else {
264           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
265         }
266       }
267     }
268   }
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
275 {
276   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
277   const PetscScalar *value;
278   MatScalar         *barray=baij->barray;
279   PetscBool         roworiented = baij->roworiented;
280   PetscErrorCode    ierr;
281   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
282   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
283   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
284 
285   PetscFunctionBegin;
286   if(!barray) {
287     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
288     baij->barray = barray;
289   }
290 
291   if (roworiented) {
292     stepval = (n-1)*bs;
293   } else {
294     stepval = (m-1)*bs;
295   }
296   for (i=0; i<m; i++) {
297     if (im[i] < 0) continue;
298 #if defined(PETSC_USE_DEBUG)
299     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
300 #endif
301     if (im[i] >= rstart && im[i] < rend) {
302       row = im[i] - rstart;
303       for (j=0; j<n; j++) {
304         /* If NumCol = 1 then a copy is not required */
305         if ((roworiented) && (n == 1)) {
306           barray = (MatScalar*)v + i*bs2;
307         } else if((!roworiented) && (m == 1)) {
308           barray = (MatScalar*)v + j*bs2;
309         } else { /* Here a copy is required */
310           if (roworiented) {
311             value = v + (i*(stepval+bs) + j)*bs;
312           } else {
313 	    value = v + (j*(stepval+bs) + i)*bs;
314           }
315           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
316             for (jj=0; jj<bs; jj++) {
317               barray[jj]  = value[jj];
318             }
319             barray += bs;
320           }
321           barray -= bs2;
322         }
323 
324         if (in[j] >= cstart && in[j] < cend){
325           col  = in[j] - cstart;
326           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
327         }
328         else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined (PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined (PETSC_USE_CTABLE)
349 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
355               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col =  in[j];
357             }
358           }
359           else col = in[j];
360           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
361         }
362       }
363     } else {
364       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
365       if (!baij->donotstash) {
366         if (roworiented) {
367           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         } else {
369           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         }
371       }
372     }
373   }
374   PetscFunctionReturn(0);
375 }
376 
377 #define HASH_KEY 0.6180339887
378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 #undef __FUNCT__
382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
384 {
385   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
386   PetscBool      roworiented = baij->roworiented;
387   PetscErrorCode ierr;
388   PetscInt       i,j,row,col;
389   PetscInt       rstart_orig=mat->rmap->rstart;
390   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
391   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
392   PetscReal      tmp;
393   MatScalar      **HD = baij->hd,value;
394 #if defined(PETSC_USE_DEBUG)
395   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
396 #endif
397 
398   PetscFunctionBegin;
399   if (v) PetscValidScalarPointer(v,6);
400   for (i=0; i<m; i++) {
401 #if defined(PETSC_USE_DEBUG)
402     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
403     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
404 #endif
405       row = im[i];
406     if (row >= rstart_orig && row < rend_orig) {
407       for (j=0; j<n; j++) {
408         col = in[j];
409         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
423             if (idx == h1) {
424               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
425             }
426           }
427         }
428 #else
429         if (HT[idx] != key) {
430           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
431           if (idx == size) {
432             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
433             if (idx == h1) {
434               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
435             }
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453 #if defined(PETSC_USE_DEBUG)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463 {
464   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465   PetscBool         roworiented = baij->roworiented;
466   PetscErrorCode    ierr;
467   PetscInt          i,j,ii,jj,row,col;
468   PetscInt          rstart=baij->rstartbs;
469   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471   PetscReal         tmp;
472   MatScalar         **HD = baij->hd,*baij_a;
473   const PetscScalar *v_t,*value;
474 #if defined(PETSC_USE_DEBUG)
475   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476 #endif
477 
478   PetscFunctionBegin;
479 
480   if (roworiented) {
481     stepval = (n-1)*bs;
482   } else {
483     stepval = (m-1)*bs;
484   }
485   for (i=0; i<m; i++) {
486 #if defined(PETSC_USE_DEBUG)
487     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
488     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
489 #endif
490     row   = im[i];
491     v_t   = v + i*nbs2;
492     if (row >= rstart && row < rend) {
493       for (j=0; j<n; j++) {
494         col = in[j];
495 
496         /* Look up into the Hash Table */
497         key = row*Nbs+col+1;
498         h1  = HASH(size,key,tmp);
499 
500         idx = h1;
501 #if defined(PETSC_USE_DEBUG)
502         total_ct++;
503         insert_ct++;
504        if (HT[idx] != key) {
505           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
506           if (idx == size) {
507             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
508             if (idx == h1) {
509               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510             }
511           }
512         }
513 #else
514         if (HT[idx] != key) {
515           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
516           if (idx == size) {
517             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
518             if (idx == h1) {
519               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
520             }
521           }
522         }
523 #endif
524         baij_a = HD[idx];
525         if (roworiented) {
526           /*value = v + i*(stepval+bs)*bs + j*bs;*/
527           /* value = v + (i*(stepval+bs)+j)*bs; */
528           value = v_t;
529           v_t  += bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval) {
532               for (jj=ii; jj<bs2; jj+=bs) {
533                 baij_a[jj]  += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval) {
538               for (jj=ii; jj<bs2; jj+=bs) {
539                 baij_a[jj]  = *value++;
540               }
541             }
542           }
543         } else {
544           value = v + j*(stepval+bs)*bs + i*bs;
545           if (addv == ADD_VALUES) {
546             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
547               for (jj=0; jj<bs; jj++) {
548                 baij_a[jj]  += *value++;
549               }
550             }
551           } else {
552             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
553               for (jj=0; jj<bs; jj++) {
554                 baij_a[jj]  = *value++;
555               }
556             }
557           }
558         }
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetValues_MPIBAIJ"
579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580 {
581   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscErrorCode ierr;
583   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
584   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
585 
586   PetscFunctionBegin;
587   for (i=0; i<m; i++) {
588     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
591       row = idxm[i] - bsrstart;
592       for (j=0; j<n; j++) {
593         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595         if (idxn[j] >= bscstart && idxn[j] < bscend){
596           col = idxn[j] - bscstart;
597           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598         } else {
599           if (!baij->colmap) {
600             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
601           }
602 #if defined (PETSC_USE_CTABLE)
603           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
604           data --;
605 #else
606           data = baij->colmap[idxn[j]/bs]-1;
607 #endif
608           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
609           else {
610             col  = data + idxn[j]%bs;
611             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
612           }
613         }
614       }
615     } else {
616       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
617     }
618   }
619  PetscFunctionReturn(0);
620 }
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "MatNorm_MPIBAIJ"
624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
625 {
626   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
627   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
628   PetscErrorCode ierr;
629   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
630   PetscReal      sum = 0.0;
631   MatScalar      *v;
632 
633   PetscFunctionBegin;
634   if (baij->size == 1) {
635     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
636   } else {
637     if (type == NORM_FROBENIUS) {
638       v = amat->a;
639       nz = amat->nz*bs2;
640       for (i=0; i<nz; i++) {
641 #if defined(PETSC_USE_COMPLEX)
642         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
643 #else
644         sum += (*v)*(*v); v++;
645 #endif
646       }
647       v = bmat->a;
648       nz = bmat->nz*bs2;
649       for (i=0; i<nz; i++) {
650 #if defined(PETSC_USE_COMPLEX)
651         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
652 #else
653         sum += (*v)*(*v); v++;
654 #endif
655       }
656       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
657       *nrm = sqrt(*nrm);
658     } else if (type == NORM_1) { /* max column sum */
659       PetscReal *tmp,*tmp2;
660       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
661       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
662       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
663       v = amat->a; jj = amat->j;
664       for (i=0; i<amat->nz; i++) {
665         for (j=0; j<bs; j++){
666           col = bs*(cstart + *jj) + j; /* column index */
667           for (row=0; row<bs; row++){
668             tmp[col] += PetscAbsScalar(*v);  v++;
669           }
670         }
671         jj++;
672       }
673       v = bmat->a; jj = bmat->j;
674       for (i=0; i<bmat->nz; i++) {
675         for (j=0; j<bs; j++){
676           col = bs*garray[*jj] + j;
677           for (row=0; row<bs; row++){
678             tmp[col] += PetscAbsScalar(*v); v++;
679           }
680         }
681         jj++;
682       }
683       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
684       *nrm = 0.0;
685       for (j=0; j<mat->cmap->N; j++) {
686         if (tmp2[j] > *nrm) *nrm = tmp2[j];
687       }
688       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
689     } else if (type == NORM_INFINITY) { /* max row sum */
690       PetscReal *sums;
691       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
692       sum = 0.0;
693       for (j=0; j<amat->mbs; j++) {
694         for (row=0; row<bs; row++) sums[row] = 0.0;
695         v = amat->a + bs2*amat->i[j];
696         nz = amat->i[j+1]-amat->i[j];
697         for (i=0; i<nz; i++) {
698           for (col=0; col<bs; col++){
699             for (row=0; row<bs; row++){
700               sums[row] += PetscAbsScalar(*v); v++;
701             }
702           }
703         }
704         v = bmat->a + bs2*bmat->i[j];
705         nz = bmat->i[j+1]-bmat->i[j];
706         for (i=0; i<nz; i++) {
707           for (col=0; col<bs; col++){
708             for (row=0; row<bs; row++){
709               sums[row] += PetscAbsScalar(*v); v++;
710             }
711           }
712         }
713         for (row=0; row<bs; row++){
714           if (sums[row] > sum) sum = sums[row];
715         }
716       }
717       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
718       ierr = PetscFree(sums);CHKERRQ(ierr);
719     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 /*
725   Creates the hash table, and sets the table
726   This table is created only once.
727   If new entried need to be added to the matrix
728   then the hash table has to be destroyed and
729   recreated.
730 */
731 #undef __FUNCT__
732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
734 {
735   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
736   Mat            A = baij->A,B=baij->B;
737   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
738   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
739   PetscErrorCode ierr;
740   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
741   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
742   PetscInt       *HT,key;
743   MatScalar      **HD;
744   PetscReal      tmp;
745 #if defined(PETSC_USE_INFO)
746   PetscInt       ct=0,max=0;
747 #endif
748 
749   PetscFunctionBegin;
750   if (baij->ht) PetscFunctionReturn(0);
751 
752   baij->ht_size = (PetscInt)(factor*nz);
753   ht_size       = baij->ht_size;
754 
755   /* Allocate Memory for Hash Table */
756   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
757   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
758   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
759   HD   = baij->hd;
760   HT   = baij->ht;
761 
762   /* Loop Over A */
763   for (i=0; i<a->mbs; i++) {
764     for (j=ai[i]; j<ai[i+1]; j++) {
765       row = i+rstart;
766       col = aj[j]+cstart;
767 
768       key = row*Nbs + col + 1;
769       h1  = HASH(ht_size,key,tmp);
770       for (k=0; k<ht_size; k++){
771         if (!HT[(h1+k)%ht_size]) {
772           HT[(h1+k)%ht_size] = key;
773           HD[(h1+k)%ht_size] = a->a + j*bs2;
774           break;
775 #if defined(PETSC_USE_INFO)
776         } else {
777           ct++;
778 #endif
779         }
780       }
781 #if defined(PETSC_USE_INFO)
782       if (k> max) max = k;
783 #endif
784     }
785   }
786   /* Loop Over B */
787   for (i=0; i<b->mbs; i++) {
788     for (j=bi[i]; j<bi[i+1]; j++) {
789       row = i+rstart;
790       col = garray[bj[j]];
791       key = row*Nbs + col + 1;
792       h1  = HASH(ht_size,key,tmp);
793       for (k=0; k<ht_size; k++){
794         if (!HT[(h1+k)%ht_size]) {
795           HT[(h1+k)%ht_size] = key;
796           HD[(h1+k)%ht_size] = b->a + j*bs2;
797           break;
798 #if defined(PETSC_USE_INFO)
799         } else {
800           ct++;
801 #endif
802         }
803       }
804 #if defined(PETSC_USE_INFO)
805       if (k> max) max = k;
806 #endif
807     }
808   }
809 
810   /* Print Summary */
811 #if defined(PETSC_USE_INFO)
812   for (i=0,j=0; i<ht_size; i++) {
813     if (HT[i]) {j++;}
814   }
815   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
816 #endif
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
823 {
824   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
825   PetscErrorCode ierr;
826   PetscInt       nstash,reallocs;
827   InsertMode     addv;
828 
829   PetscFunctionBegin;
830   if (baij->donotstash || mat->nooffprocentries) {
831     PetscFunctionReturn(0);
832   }
833 
834   /* make sure all processors are either in INSERTMODE or ADDMODE */
835   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
836   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
837   mat->insertmode = addv; /* in case this processor had no cache */
838 
839   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
840   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
841   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
842   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
843   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
844   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
851 {
852   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
853   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
854   PetscErrorCode ierr;
855   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
856   PetscInt       *row,*col;
857   PetscBool      r1,r2,r3,other_disassembled;
858   MatScalar      *val;
859   InsertMode     addv = mat->insertmode;
860   PetscMPIInt    n;
861 
862   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
863   PetscFunctionBegin;
864   if (!baij->donotstash && !mat->nooffprocentries) {
865     while (1) {
866       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
867       if (!flg) break;
868 
869       for (i=0; i<n;) {
870         /* Now identify the consecutive vals belonging to the same row */
871         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
872         if (j < n) ncols = j-i;
873         else       ncols = n-i;
874         /* Now assemble all these values with a single function call */
875         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
876         i = j;
877       }
878     }
879     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
880     /* Now process the block-stash. Since the values are stashed column-oriented,
881        set the roworiented flag to column oriented, and after MatSetValues()
882        restore the original flags */
883     r1 = baij->roworiented;
884     r2 = a->roworiented;
885     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
886     baij->roworiented = PETSC_FALSE;
887     a->roworiented    = PETSC_FALSE;
888     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
889     while (1) {
890       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
891       if (!flg) break;
892 
893       for (i=0; i<n;) {
894         /* Now identify the consecutive vals belonging to the same row */
895         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
896         if (j < n) ncols = j-i;
897         else       ncols = n-i;
898         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
899         i = j;
900       }
901     }
902     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
903     baij->roworiented = r1;
904     a->roworiented    = r2;
905     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
906   }
907 
908   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
909   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
910 
911   /* determine if any processor has disassembled, if so we must
912      also disassemble ourselfs, in order that we may reassemble. */
913   /*
914      if nonzero structure of submatrix B cannot change then we know that
915      no processor disassembled thus we can skip this stuff
916   */
917   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
918     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
919     if (mat->was_assembled && !other_disassembled) {
920       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
921     }
922   }
923 
924   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
925     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
926   }
927   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
928   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
929   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
930 
931 #if defined(PETSC_USE_INFO)
932   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
933     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
934     baij->ht_total_ct  = 0;
935     baij->ht_insert_ct = 0;
936   }
937 #endif
938   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
939     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
940     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
941     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
942   }
943 
944   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
945   baij->rowvalues = 0;
946   PetscFunctionReturn(0);
947 }
948 
949 #undef __FUNCT__
950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
952 {
953   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
954   PetscErrorCode    ierr;
955   PetscMPIInt       size = baij->size,rank = baij->rank;
956   PetscInt          bs = mat->rmap->bs;
957   PetscBool         iascii,isdraw;
958   PetscViewer       sviewer;
959   PetscViewerFormat format;
960 
961   PetscFunctionBegin;
962   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
963   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
964   if (iascii) {
965     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
966     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
967       MatInfo info;
968       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
969       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
970       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
971              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
972       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
973       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
974       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
975       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
976       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
977       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
978       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
979       PetscFunctionReturn(0);
980     } else if (format == PETSC_VIEWER_ASCII_INFO) {
981       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
984       PetscFunctionReturn(0);
985     }
986   }
987 
988   if (isdraw) {
989     PetscDraw       draw;
990     PetscBool  isnull;
991     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
992     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
993   }
994 
995   if (size == 1) {
996     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
997     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
998   } else {
999     /* assemble the entire matrix onto first processor. */
1000     Mat         A;
1001     Mat_SeqBAIJ *Aloc;
1002     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1003     MatScalar   *a;
1004 
1005     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1006     /* Perhaps this should be the type of mat? */
1007     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1008     if (!rank) {
1009       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1010     } else {
1011       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1012     }
1013     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1014     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1015     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1016 
1017     /* copy over the A part */
1018     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1019     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1020     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1021 
1022     for (i=0; i<mbs; i++) {
1023       rvals[0] = bs*(baij->rstartbs + i);
1024       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1025       for (j=ai[i]; j<ai[i+1]; j++) {
1026         col = (baij->cstartbs+aj[j])*bs;
1027         for (k=0; k<bs; k++) {
1028           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1029           col++; a += bs;
1030         }
1031       }
1032     }
1033     /* copy over the B part */
1034     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1035     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1036     for (i=0; i<mbs; i++) {
1037       rvals[0] = bs*(baij->rstartbs + i);
1038       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1039       for (j=ai[i]; j<ai[i+1]; j++) {
1040         col = baij->garray[aj[j]]*bs;
1041         for (k=0; k<bs; k++) {
1042           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1043           col++; a += bs;
1044         }
1045       }
1046     }
1047     ierr = PetscFree(rvals);CHKERRQ(ierr);
1048     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1049     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1050     /*
1051        Everyone has to call to draw the matrix since the graphics waits are
1052        synchronized across all processors that share the PetscDraw object
1053     */
1054     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1055     if (!rank) {
1056       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1057     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1058       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1059       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1060     }
1061     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1062     ierr = MatDestroy(A);CHKERRQ(ierr);
1063   }
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1069 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1070 {
1071   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1072   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1073   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1074   PetscErrorCode ierr;
1075   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1076   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1077   int            fd;
1078   PetscScalar    *column_values;
1079   FILE           *file;
1080   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1081   PetscInt       message_count,flowcontrolcount;
1082 
1083   PetscFunctionBegin;
1084   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1085   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1086   nz   = bs2*(A->nz + B->nz);
1087   rlen = mat->rmap->n;
1088   if (!rank) {
1089     header[0] = MAT_FILE_CLASSID;
1090     header[1] = mat->rmap->N;
1091     header[2] = mat->cmap->N;
1092     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1093     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1094     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1095     /* get largest number of rows any processor has */
1096     range = mat->rmap->range;
1097     for (i=1; i<size; i++) {
1098       rlen = PetscMax(rlen,range[i+1] - range[i]);
1099     }
1100   } else {
1101     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1102   }
1103 
1104   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1105   /* compute lengths of each row  */
1106   for (i=0; i<a->mbs; i++) {
1107     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1108   }
1109   /* store the row lengths to the file */
1110   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1111   if (!rank) {
1112     MPI_Status status;
1113     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1114     rlen  = (range[1] - range[0])/bs;
1115     for (i=0; i<rlen; i++) {
1116       for (j=0; j<bs; j++) {
1117         row_lens[i*bs+j] = bs*crow_lens[i];
1118       }
1119     }
1120     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1121     for (i=1; i<size; i++) {
1122       rlen = (range[i+1] - range[i])/bs;
1123       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1124       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1125       for (k=0; k<rlen; k++) {
1126 	for (j=0; j<bs; j++) {
1127 	  row_lens[k*bs+j] = bs*crow_lens[k];
1128 	}
1129       }
1130       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1131     }
1132     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1133     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1134   } else {
1135     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1136     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1137     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1138   }
1139   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1140 
1141   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1142      information needed to make it for each row from a block row. This does require more communication but still not more than
1143      the communication needed for the nonzero values  */
1144   nzmax = nz; /*  space a largest processor needs */
1145   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1146   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1147   cnt  = 0;
1148   for (i=0; i<a->mbs; i++) {
1149     pcnt = cnt;
1150     for (j=B->i[i]; j<B->i[i+1]; j++) {
1151       if ( (col = garray[B->j[j]]) > cstart) break;
1152       for (l=0; l<bs; l++) {
1153 	column_indices[cnt++] = bs*col+l;
1154       }
1155     }
1156     for (k=A->i[i]; k<A->i[i+1]; k++) {
1157       for (l=0; l<bs; l++) {
1158         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1159       }
1160     }
1161     for (; j<B->i[i+1]; j++) {
1162       for (l=0; l<bs; l++) {
1163         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1164       }
1165     }
1166     len = cnt - pcnt;
1167     for (k=1; k<bs; k++) {
1168       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1169       cnt += len;
1170     }
1171   }
1172   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1173 
1174   /* store the columns to the file */
1175   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1176   if (!rank) {
1177     MPI_Status status;
1178     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1179     for (i=1; i<size; i++) {
1180       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1181       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1182       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1183       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1184     }
1185     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1186   } else {
1187     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1188     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1189     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1190     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1191   }
1192   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1193 
1194   /* load up the numerical values */
1195   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1196   cnt = 0;
1197   for (i=0; i<a->mbs; i++) {
1198     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1199     for (j=B->i[i]; j<B->i[i+1]; j++) {
1200       if ( garray[B->j[j]] > cstart) break;
1201       for (l=0; l<bs; l++) {
1202         for (ll=0; ll<bs; ll++) {
1203 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1204         }
1205       }
1206       cnt += bs;
1207     }
1208     for (k=A->i[i]; k<A->i[i+1]; k++) {
1209       for (l=0; l<bs; l++) {
1210         for (ll=0; ll<bs; ll++) {
1211           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1212         }
1213       }
1214       cnt += bs;
1215     }
1216     for (; j<B->i[i+1]; j++) {
1217       for (l=0; l<bs; l++) {
1218         for (ll=0; ll<bs; ll++) {
1219 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1220         }
1221       }
1222       cnt += bs;
1223     }
1224     cnt += (bs-1)*rlen;
1225   }
1226   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1227 
1228   /* store the column values to the file */
1229   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1230   if (!rank) {
1231     MPI_Status status;
1232     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1233     for (i=1; i<size; i++) {
1234       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1235       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1236       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1237       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1238     }
1239     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1240   } else {
1241     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1242     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1243     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1244     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1245   }
1246   ierr = PetscFree(column_values);CHKERRQ(ierr);
1247 
1248   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1249   if (file) {
1250     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1251   }
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatView_MPIBAIJ"
1257 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1258 {
1259   PetscErrorCode ierr;
1260   PetscBool      iascii,isdraw,issocket,isbinary;
1261 
1262   PetscFunctionBegin;
1263   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1264   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1265   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1266   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1267   if (iascii || isdraw || issocket) {
1268     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1269   } else if (isbinary) {
1270     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1271   } else {
1272     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1273   }
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1279 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1280 {
1281   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1282   PetscErrorCode ierr;
1283 
1284   PetscFunctionBegin;
1285 #if defined(PETSC_USE_LOG)
1286   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1287 #endif
1288   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1289   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1290   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
1291   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
1292 #if defined (PETSC_USE_CTABLE)
1293   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1294 #else
1295   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1296 #endif
1297   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1298   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1299   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1300   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1301   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1302   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1303   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1304   ierr = PetscFree(baij);CHKERRQ(ierr);
1305 
1306   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatMult_MPIBAIJ"
1319 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1320 {
1321   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1322   PetscErrorCode ierr;
1323   PetscInt       nt;
1324 
1325   PetscFunctionBegin;
1326   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1327   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1328   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1329   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1330   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1331   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1332   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1333   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1339 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1340 {
1341   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1342   PetscErrorCode ierr;
1343 
1344   PetscFunctionBegin;
1345   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1346   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1347   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1348   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1354 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1355 {
1356   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1357   PetscErrorCode ierr;
1358   PetscBool      merged;
1359 
1360   PetscFunctionBegin;
1361   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1362   /* do nondiagonal part */
1363   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1364   if (!merged) {
1365     /* send it on its way */
1366     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1367     /* do local part */
1368     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1369     /* receive remote parts: note this assumes the values are not actually */
1370     /* inserted in yy until the next line */
1371     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1372   } else {
1373     /* do local part */
1374     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1375     /* send it on its way */
1376     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1377     /* values actually were received in the Begin() but we need to call this nop */
1378     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1379   }
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1385 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1386 {
1387   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1388   PetscErrorCode ierr;
1389 
1390   PetscFunctionBegin;
1391   /* do nondiagonal part */
1392   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1393   /* send it on its way */
1394   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1395   /* do local part */
1396   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1397   /* receive remote parts: note this assumes the values are not actually */
1398   /* inserted in yy until the next line, which is true for my implementation*/
1399   /* but is not perhaps always true. */
1400   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 /*
1405   This only works correctly for square matrices where the subblock A->A is the
1406    diagonal block
1407 */
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1410 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1411 {
1412   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1413   PetscErrorCode ierr;
1414 
1415   PetscFunctionBegin;
1416   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1417   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatScale_MPIBAIJ"
1423 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1424 {
1425   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1426   PetscErrorCode ierr;
1427 
1428   PetscFunctionBegin;
1429   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1430   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1436 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1437 {
1438   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1439   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1440   PetscErrorCode ierr;
1441   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1442   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1443   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1444 
1445   PetscFunctionBegin;
1446   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1447   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1448   mat->getrowactive = PETSC_TRUE;
1449 
1450   if (!mat->rowvalues && (idx || v)) {
1451     /*
1452         allocate enough space to hold information from the longest row.
1453     */
1454     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1455     PetscInt     max = 1,mbs = mat->mbs,tmp;
1456     for (i=0; i<mbs; i++) {
1457       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1458       if (max < tmp) { max = tmp; }
1459     }
1460     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1461   }
1462   lrow = row - brstart;
1463 
1464   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1465   if (!v)   {pvA = 0; pvB = 0;}
1466   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1467   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1468   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1469   nztot = nzA + nzB;
1470 
1471   cmap  = mat->garray;
1472   if (v  || idx) {
1473     if (nztot) {
1474       /* Sort by increasing column numbers, assuming A and B already sorted */
1475       PetscInt imark = -1;
1476       if (v) {
1477         *v = v_p = mat->rowvalues;
1478         for (i=0; i<nzB; i++) {
1479           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1480           else break;
1481         }
1482         imark = i;
1483         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1484         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1485       }
1486       if (idx) {
1487         *idx = idx_p = mat->rowindices;
1488         if (imark > -1) {
1489           for (i=0; i<imark; i++) {
1490             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1491           }
1492         } else {
1493           for (i=0; i<nzB; i++) {
1494             if (cmap[cworkB[i]/bs] < cstart)
1495               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1496             else break;
1497           }
1498           imark = i;
1499         }
1500         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1501         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1502       }
1503     } else {
1504       if (idx) *idx = 0;
1505       if (v)   *v   = 0;
1506     }
1507   }
1508   *nz = nztot;
1509   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1510   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1516 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1517 {
1518   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1519 
1520   PetscFunctionBegin;
1521   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1522   baij->getrowactive = PETSC_FALSE;
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1528 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1529 {
1530   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1531   PetscErrorCode ierr;
1532 
1533   PetscFunctionBegin;
1534   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1535   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1541 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1542 {
1543   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1544   Mat            A = a->A,B = a->B;
1545   PetscErrorCode ierr;
1546   PetscReal      isend[5],irecv[5];
1547 
1548   PetscFunctionBegin;
1549   info->block_size     = (PetscReal)matin->rmap->bs;
1550   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1551   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1552   isend[3] = info->memory;  isend[4] = info->mallocs;
1553   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1554   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1555   isend[3] += info->memory;  isend[4] += info->mallocs;
1556   if (flag == MAT_LOCAL) {
1557     info->nz_used      = isend[0];
1558     info->nz_allocated = isend[1];
1559     info->nz_unneeded  = isend[2];
1560     info->memory       = isend[3];
1561     info->mallocs      = isend[4];
1562   } else if (flag == MAT_GLOBAL_MAX) {
1563     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1564     info->nz_used      = irecv[0];
1565     info->nz_allocated = irecv[1];
1566     info->nz_unneeded  = irecv[2];
1567     info->memory       = irecv[3];
1568     info->mallocs      = irecv[4];
1569   } else if (flag == MAT_GLOBAL_SUM) {
1570     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1571     info->nz_used      = irecv[0];
1572     info->nz_allocated = irecv[1];
1573     info->nz_unneeded  = irecv[2];
1574     info->memory       = irecv[3];
1575     info->mallocs      = irecv[4];
1576   } else {
1577     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1578   }
1579   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1580   info->fill_ratio_needed = 0;
1581   info->factor_mallocs    = 0;
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1587 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1588 {
1589   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1590   PetscErrorCode ierr;
1591 
1592   PetscFunctionBegin;
1593   switch (op) {
1594   case MAT_NEW_NONZERO_LOCATIONS:
1595   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1596   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1597   case MAT_KEEP_NONZERO_PATTERN:
1598   case MAT_NEW_NONZERO_LOCATION_ERR:
1599     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1600     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1601     break;
1602   case MAT_ROW_ORIENTED:
1603     a->roworiented = flg;
1604     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1605     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1606     break;
1607   case MAT_NEW_DIAGONALS:
1608     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1609     break;
1610   case MAT_IGNORE_OFF_PROC_ENTRIES:
1611     a->donotstash = flg;
1612     break;
1613   case MAT_USE_HASH_TABLE:
1614     a->ht_flag = flg;
1615     break;
1616   case MAT_SYMMETRIC:
1617   case MAT_STRUCTURALLY_SYMMETRIC:
1618   case MAT_HERMITIAN:
1619   case MAT_SYMMETRY_ETERNAL:
1620     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1621     break;
1622   default:
1623     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1624   }
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 #undef __FUNCT__
1629 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1630 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1631 {
1632   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1633   Mat_SeqBAIJ    *Aloc;
1634   Mat            B;
1635   PetscErrorCode ierr;
1636   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1637   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1638   MatScalar      *a;
1639 
1640   PetscFunctionBegin;
1641   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1642   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1643     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1644     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1645     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1646     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1647   } else {
1648     B = *matout;
1649   }
1650 
1651   /* copy over the A part */
1652   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1653   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1654   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1655 
1656   for (i=0; i<mbs; i++) {
1657     rvals[0] = bs*(baij->rstartbs + i);
1658     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1659     for (j=ai[i]; j<ai[i+1]; j++) {
1660       col = (baij->cstartbs+aj[j])*bs;
1661       for (k=0; k<bs; k++) {
1662         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1663         col++; a += bs;
1664       }
1665     }
1666   }
1667   /* copy over the B part */
1668   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1669   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1670   for (i=0; i<mbs; i++) {
1671     rvals[0] = bs*(baij->rstartbs + i);
1672     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1673     for (j=ai[i]; j<ai[i+1]; j++) {
1674       col = baij->garray[aj[j]]*bs;
1675       for (k=0; k<bs; k++) {
1676         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1677         col++; a += bs;
1678       }
1679     }
1680   }
1681   ierr = PetscFree(rvals);CHKERRQ(ierr);
1682   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1683   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1684 
1685   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1686     *matout = B;
1687   } else {
1688     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1689   }
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 #undef __FUNCT__
1694 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1695 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1696 {
1697   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1698   Mat            a = baij->A,b = baij->B;
1699   PetscErrorCode ierr;
1700   PetscInt       s1,s2,s3;
1701 
1702   PetscFunctionBegin;
1703   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1704   if (rr) {
1705     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1706     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1707     /* Overlap communication with computation. */
1708     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1709   }
1710   if (ll) {
1711     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1712     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1713     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1714   }
1715   /* scale  the diagonal block */
1716   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1717 
1718   if (rr) {
1719     /* Do a scatter end and then right scale the off-diagonal block */
1720     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1721     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1722   }
1723 
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1729 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1730 {
1731   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1732   PetscErrorCode    ierr;
1733   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1734   PetscInt          i,*owners = A->rmap->range;
1735   PetscInt          *nprocs,j,idx,nsends,row;
1736   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1737   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1738   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1739   MPI_Comm          comm = ((PetscObject)A)->comm;
1740   MPI_Request       *send_waits,*recv_waits;
1741   MPI_Status        recv_status,*send_status;
1742   const PetscScalar *xx;
1743   PetscScalar       *bb;
1744 #if defined(PETSC_DEBUG)
1745   PetscBool         found = PETSC_FALSE;
1746 #endif
1747 
1748   PetscFunctionBegin;
1749   /*  first count number of contributors to each processor */
1750   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1751   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1752   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1753   j = 0;
1754   for (i=0; i<N; i++) {
1755     if (lastidx > (idx = rows[i])) j = 0;
1756     lastidx = idx;
1757     for (; j<size; j++) {
1758       if (idx >= owners[j] && idx < owners[j+1]) {
1759         nprocs[2*j]++;
1760         nprocs[2*j+1] = 1;
1761         owner[i] = j;
1762 #if defined(PETSC_DEBUG)
1763         found = PETSC_TRUE;
1764 #endif
1765         break;
1766       }
1767     }
1768 #if defined(PETSC_DEBUG)
1769     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1770     found = PETSC_FALSE;
1771 #endif
1772   }
1773   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1774 
1775   if (A->nooffproczerorows) {
1776     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1777     nrecvs = nsends;
1778     nmax   = N;
1779   } else {
1780     /* inform other processors of number of messages and max length*/
1781     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1782   }
1783 
1784   /* post receives:   */
1785   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1786   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1787   for (i=0; i<nrecvs; i++) {
1788     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1789   }
1790 
1791   /* do sends:
1792      1) starts[i] gives the starting index in svalues for stuff going to
1793      the ith processor
1794   */
1795   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1796   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1797   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1798   starts[0]  = 0;
1799   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1800   for (i=0; i<N; i++) {
1801     svalues[starts[owner[i]]++] = rows[i];
1802   }
1803 
1804   starts[0] = 0;
1805   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1806   count = 0;
1807   for (i=0; i<size; i++) {
1808     if (nprocs[2*i+1]) {
1809       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1810     }
1811   }
1812   ierr = PetscFree(starts);CHKERRQ(ierr);
1813 
1814   base = owners[rank];
1815 
1816   /*  wait on receives */
1817   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1818   count  = nrecvs;
1819   slen = 0;
1820   while (count) {
1821     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1822     /* unpack receives into our local space */
1823     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1824     source[imdex]  = recv_status.MPI_SOURCE;
1825     lens[imdex]    = n;
1826     slen          += n;
1827     count--;
1828   }
1829   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1830 
1831   /* move the data into the send scatter */
1832   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1833   count = 0;
1834   for (i=0; i<nrecvs; i++) {
1835     values = rvalues + i*nmax;
1836     for (j=0; j<lens[i]; j++) {
1837       lrows[count++] = values[j] - base;
1838     }
1839   }
1840   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1841   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1842   ierr = PetscFree(owner);CHKERRQ(ierr);
1843   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1844 
1845   /* fix right hand side if needed */
1846   if (x && b) {
1847     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1848     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1849     for (i=0; i<slen; i++) {
1850       bb[lrows[i]] = diag*xx[lrows[i]];
1851     }
1852     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1853     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1854   }
1855 
1856   /* actually zap the local rows */
1857   /*
1858         Zero the required rows. If the "diagonal block" of the matrix
1859      is square and the user wishes to set the diagonal we use separate
1860      code so that MatSetValues() is not called for each diagonal allocating
1861      new memory, thus calling lots of mallocs and slowing things down.
1862 
1863   */
1864   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1865   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1866   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1867     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1868   } else if (diag != 0.0) {
1869     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1870     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1871        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1872     for (i=0; i<slen; i++) {
1873       row  = lrows[i] + rstart_bs;
1874       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1875     }
1876     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1877     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1878   } else {
1879     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1880   }
1881 
1882   ierr = PetscFree(lrows);CHKERRQ(ierr);
1883 
1884   /* wait on sends */
1885   if (nsends) {
1886     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1887     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1888     ierr = PetscFree(send_status);CHKERRQ(ierr);
1889   }
1890   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1891   ierr = PetscFree(svalues);CHKERRQ(ierr);
1892 
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1898 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1899 {
1900   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1901   PetscErrorCode ierr;
1902 
1903   PetscFunctionBegin;
1904   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1909 
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatEqual_MPIBAIJ"
1912 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1913 {
1914   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1915   Mat            a,b,c,d;
1916   PetscBool      flg;
1917   PetscErrorCode ierr;
1918 
1919   PetscFunctionBegin;
1920   a = matA->A; b = matA->B;
1921   c = matB->A; d = matB->B;
1922 
1923   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1924   if (flg) {
1925     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1926   }
1927   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatCopy_MPIBAIJ"
1933 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1934 {
1935   PetscErrorCode ierr;
1936   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1937   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1938 
1939   PetscFunctionBegin;
1940   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1941   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1942     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1943   } else {
1944     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1945     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1946   }
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1952 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1953 {
1954   PetscErrorCode ierr;
1955 
1956   PetscFunctionBegin;
1957   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1963 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1964 {
1965   PetscErrorCode ierr;
1966   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1967   PetscBLASInt   bnz,one=1;
1968   Mat_SeqBAIJ    *x,*y;
1969 
1970   PetscFunctionBegin;
1971   if (str == SAME_NONZERO_PATTERN) {
1972     PetscScalar alpha = a;
1973     x = (Mat_SeqBAIJ *)xx->A->data;
1974     y = (Mat_SeqBAIJ *)yy->A->data;
1975     bnz = PetscBLASIntCast(x->nz);
1976     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1977     x = (Mat_SeqBAIJ *)xx->B->data;
1978     y = (Mat_SeqBAIJ *)yy->B->data;
1979     bnz = PetscBLASIntCast(x->nz);
1980     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1981   } else {
1982     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1983   }
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 #undef __FUNCT__
1988 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1989 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1990 {
1991   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1992   PetscInt rbs,cbs;
1993   PetscErrorCode ierr;
1994 
1995   PetscFunctionBegin;
1996   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1997   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1998   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1999   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2000   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2001   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2002   PetscFunctionReturn(0);
2003 }
2004 
2005 #undef __FUNCT__
2006 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2007 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2008 {
2009   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2010   PetscErrorCode ierr;
2011 
2012   PetscFunctionBegin;
2013   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2014   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 #undef __FUNCT__
2019 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2020 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2021 {
2022   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2023   PetscErrorCode ierr;
2024 
2025   PetscFunctionBegin;
2026   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2027   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 #undef __FUNCT__
2032 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2033 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2034 {
2035   PetscErrorCode ierr;
2036   IS             iscol_local;
2037   PetscInt       csize;
2038 
2039   PetscFunctionBegin;
2040   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2041   if (call == MAT_REUSE_MATRIX) {
2042     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2043     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2044   } else {
2045     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2046   }
2047   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2048   if (call == MAT_INITIAL_MATRIX) {
2049     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2050     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
2051   }
2052   PetscFunctionReturn(0);
2053 }
2054 
2055 #undef __FUNCT__
2056 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2057 /*
2058     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2059   in local and then by concatenating the local matrices the end result.
2060   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2061 */
2062 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2063 {
2064   PetscErrorCode ierr;
2065   PetscMPIInt    rank,size;
2066   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2067   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2068   Mat            *local,M,Mreuse;
2069   MatScalar      *vwork,*aa;
2070   MPI_Comm       comm = ((PetscObject)mat)->comm;
2071   Mat_SeqBAIJ    *aij;
2072 
2073 
2074   PetscFunctionBegin;
2075   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2076   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2077 
2078   if (call ==  MAT_REUSE_MATRIX) {
2079     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2080     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2081     local = &Mreuse;
2082     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2083   } else {
2084     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2085     Mreuse = *local;
2086     ierr   = PetscFree(local);CHKERRQ(ierr);
2087   }
2088 
2089   /*
2090       m - number of local rows
2091       n - number of columns (same on all processors)
2092       rstart - first row in new global matrix generated
2093   */
2094   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2095   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2096   m    = m/bs;
2097   n    = n/bs;
2098 
2099   if (call == MAT_INITIAL_MATRIX) {
2100     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2101     ii  = aij->i;
2102     jj  = aij->j;
2103 
2104     /*
2105         Determine the number of non-zeros in the diagonal and off-diagonal
2106         portions of the matrix in order to do correct preallocation
2107     */
2108 
2109     /* first get start and end of "diagonal" columns */
2110     if (csize == PETSC_DECIDE) {
2111       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2112       if (mglobal == n*bs) { /* square matrix */
2113 	nlocal = m;
2114       } else {
2115         nlocal = n/size + ((n % size) > rank);
2116       }
2117     } else {
2118       nlocal = csize/bs;
2119     }
2120     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2121     rstart = rend - nlocal;
2122     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2123 
2124     /* next, compute all the lengths */
2125     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2126     olens = dlens + m;
2127     for (i=0; i<m; i++) {
2128       jend = ii[i+1] - ii[i];
2129       olen = 0;
2130       dlen = 0;
2131       for (j=0; j<jend; j++) {
2132         if (*jj < rstart || *jj >= rend) olen++;
2133         else dlen++;
2134         jj++;
2135       }
2136       olens[i] = olen;
2137       dlens[i] = dlen;
2138     }
2139     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2140     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2141     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2142     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2143     ierr = PetscFree(dlens);CHKERRQ(ierr);
2144   } else {
2145     PetscInt ml,nl;
2146 
2147     M = *newmat;
2148     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2149     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2150     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2151     /*
2152          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2153        rather than the slower MatSetValues().
2154     */
2155     M->was_assembled = PETSC_TRUE;
2156     M->assembled     = PETSC_FALSE;
2157   }
2158   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2159   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2160   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2161   ii  = aij->i;
2162   jj  = aij->j;
2163   aa  = aij->a;
2164   for (i=0; i<m; i++) {
2165     row   = rstart/bs + i;
2166     nz    = ii[i+1] - ii[i];
2167     cwork = jj;     jj += nz;
2168     vwork = aa;     aa += nz;
2169     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2170   }
2171 
2172   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2173   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2174   *newmat = M;
2175 
2176   /* save submatrix used in processor for next request */
2177   if (call ==  MAT_INITIAL_MATRIX) {
2178     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2179     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2180   }
2181 
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 #undef __FUNCT__
2186 #define __FUNCT__ "MatPermute_MPIBAIJ"
2187 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2188 {
2189   MPI_Comm       comm,pcomm;
2190   PetscInt       first,local_size,nrows;
2191   const PetscInt *rows;
2192   PetscMPIInt    size;
2193   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2194   PetscErrorCode ierr;
2195 
2196   PetscFunctionBegin;
2197   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2198   /* make a collective version of 'rowp' */
2199   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2200   if (pcomm==comm) {
2201     crowp = rowp;
2202   } else {
2203     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2204     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2205     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2206     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2207   }
2208   /* collect the global row permutation and invert it */
2209   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2210   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2211   if (pcomm!=comm) {
2212     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2213   }
2214   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2215   /* get the local target indices */
2216   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2217   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2218   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2219   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2220   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2221   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2222   /* the column permutation is so much easier;
2223      make a local version of 'colp' and invert it */
2224   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2225   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2226   if (size==1) {
2227     lcolp = colp;
2228   } else {
2229     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2230     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2231     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2232   }
2233   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2234   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2235   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2236   if (size>1) {
2237     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2238     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2239   }
2240   /* now we just get the submatrix */
2241   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2242   /* clean up */
2243   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2244   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2245   PetscFunctionReturn(0);
2246 }
2247 
2248 #undef __FUNCT__
2249 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2250 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2251 {
2252   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2253   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2254 
2255   PetscFunctionBegin;
2256   if (nghosts) { *nghosts = B->nbs;}
2257   if (ghosts) {*ghosts = baij->garray;}
2258   PetscFunctionReturn(0);
2259 }
2260 
2261 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2262 
2263 #undef __FUNCT__
2264 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2265 /*
2266     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2267 */
2268 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2269 {
2270   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2271   PetscErrorCode        ierr;
2272   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2273   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2274   const PetscInt        *is;
2275   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2276   PetscInt              *rowhit,M,cstart,cend,colb;
2277   PetscInt              *columnsforrow,l;
2278   IS                    *isa;
2279   PetscBool              done,flg;
2280   ISLocalToGlobalMapping map = mat->cbmapping;
2281   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2282 
2283   PetscFunctionBegin;
2284   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2285   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2286 
2287   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2288   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2289   M                = mat->rmap->n/bs;
2290   cstart           = mat->cmap->rstart/bs;
2291   cend             = mat->cmap->rend/bs;
2292   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2293   c->N             = mat->cmap->N/bs;
2294   c->m             = mat->rmap->n/bs;
2295   c->rstart        = mat->rmap->rstart/bs;
2296 
2297   c->ncolors       = nis;
2298   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2299   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2300   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2301   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2302   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2303   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2304 
2305   /* Allow access to data structures of local part of matrix */
2306   if (!baij->colmap) {
2307     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2308   }
2309   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2310   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2311 
2312   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2313   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2314 
2315   for (i=0; i<nis; i++) {
2316     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2317     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2318     c->ncolumns[i] = n;
2319     if (n) {
2320       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2321       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2322       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2323     } else {
2324       c->columns[i]  = 0;
2325     }
2326 
2327     if (ctype == IS_COLORING_GLOBAL){
2328       /* Determine the total (parallel) number of columns of this color */
2329       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2330       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2331 
2332       nn   = PetscMPIIntCast(n);
2333       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2334       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2335       if (!nctot) {
2336         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2337       }
2338 
2339       disp[0] = 0;
2340       for (j=1; j<size; j++) {
2341         disp[j] = disp[j-1] + ncolsonproc[j-1];
2342       }
2343 
2344       /* Get complete list of columns for color on each processor */
2345       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2346       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2347       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2348     } else if (ctype == IS_COLORING_GHOSTED){
2349       /* Determine local number of columns of this color on this process, including ghost points */
2350       nctot = n;
2351       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2352       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2353     } else {
2354       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2355     }
2356 
2357     /*
2358        Mark all rows affect by these columns
2359     */
2360     /* Temporary option to allow for debugging/testing */
2361     flg  = PETSC_FALSE;
2362     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2363     if (!flg) {/*-----------------------------------------------------------------------------*/
2364       /* crude, fast version */
2365       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2366       /* loop over columns*/
2367       for (j=0; j<nctot; j++) {
2368         if (ctype == IS_COLORING_GHOSTED) {
2369           col = ltog[cols[j]];
2370         } else {
2371           col  = cols[j];
2372         }
2373         if (col >= cstart && col < cend) {
2374           /* column is in diagonal block of matrix */
2375           rows = A_cj + A_ci[col-cstart];
2376           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2377         } else {
2378 #if defined (PETSC_USE_CTABLE)
2379           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2380 	  colb --;
2381 #else
2382           colb = baij->colmap[col] - 1;
2383 #endif
2384           if (colb == -1) {
2385             m = 0;
2386           } else {
2387             colb = colb/bs;
2388             rows = B_cj + B_ci[colb];
2389             m    = B_ci[colb+1] - B_ci[colb];
2390           }
2391         }
2392         /* loop over columns marking them in rowhit */
2393         for (k=0; k<m; k++) {
2394           rowhit[*rows++] = col + 1;
2395         }
2396       }
2397 
2398       /* count the number of hits */
2399       nrows = 0;
2400       for (j=0; j<M; j++) {
2401         if (rowhit[j]) nrows++;
2402       }
2403       c->nrows[i]         = nrows;
2404       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2405       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2406       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2407       nrows = 0;
2408       for (j=0; j<M; j++) {
2409         if (rowhit[j]) {
2410           c->rows[i][nrows]           = j;
2411           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2412           nrows++;
2413         }
2414       }
2415     } else {/*-------------------------------------------------------------------------------*/
2416       /* slow version, using rowhit as a linked list */
2417       PetscInt currentcol,fm,mfm;
2418       rowhit[M] = M;
2419       nrows     = 0;
2420       /* loop over columns*/
2421       for (j=0; j<nctot; j++) {
2422         if (ctype == IS_COLORING_GHOSTED) {
2423           col = ltog[cols[j]];
2424         } else {
2425           col  = cols[j];
2426         }
2427         if (col >= cstart && col < cend) {
2428           /* column is in diagonal block of matrix */
2429           rows = A_cj + A_ci[col-cstart];
2430           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2431         } else {
2432 #if defined (PETSC_USE_CTABLE)
2433           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2434           colb --;
2435 #else
2436           colb = baij->colmap[col] - 1;
2437 #endif
2438           if (colb == -1) {
2439             m = 0;
2440           } else {
2441             colb = colb/bs;
2442             rows = B_cj + B_ci[colb];
2443             m    = B_ci[colb+1] - B_ci[colb];
2444           }
2445         }
2446 
2447         /* loop over columns marking them in rowhit */
2448         fm    = M; /* fm points to first entry in linked list */
2449         for (k=0; k<m; k++) {
2450           currentcol = *rows++;
2451 	  /* is it already in the list? */
2452           do {
2453             mfm  = fm;
2454             fm   = rowhit[fm];
2455           } while (fm < currentcol);
2456           /* not in list so add it */
2457           if (fm != currentcol) {
2458             nrows++;
2459             columnsforrow[currentcol] = col;
2460             /* next three lines insert new entry into linked list */
2461             rowhit[mfm]               = currentcol;
2462             rowhit[currentcol]        = fm;
2463             fm                        = currentcol;
2464             /* fm points to present position in list since we know the columns are sorted */
2465           } else {
2466             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2467           }
2468         }
2469       }
2470       c->nrows[i]         = nrows;
2471       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2472       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2473       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2474       /* now store the linked list of rows into c->rows[i] */
2475       nrows = 0;
2476       fm    = rowhit[M];
2477       do {
2478         c->rows[i][nrows]            = fm;
2479         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2480         fm                           = rowhit[fm];
2481       } while (fm < M);
2482     } /* ---------------------------------------------------------------------------------------*/
2483     ierr = PetscFree(cols);CHKERRQ(ierr);
2484   }
2485 
2486   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2487   /*
2488        vscale will contain the "diagonal" on processor scalings followed by the off processor
2489   */
2490   if (ctype == IS_COLORING_GLOBAL) {
2491     PetscInt *garray;
2492     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2493     for (i=0; i<baij->B->cmap->n/bs; i++) {
2494       for (j=0; j<bs; j++) {
2495         garray[i*bs+j] = bs*baij->garray[i]+j;
2496       }
2497     }
2498     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2499     ierr = PetscFree(garray);CHKERRQ(ierr);
2500     CHKMEMQ;
2501     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2502     for (k=0; k<c->ncolors; k++) {
2503       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2504       for (l=0; l<c->nrows[k]; l++) {
2505         col = c->columnsforrow[k][l];
2506         if (col >= cstart && col < cend) {
2507           /* column is in diagonal block of matrix */
2508           colb = col - cstart;
2509         } else {
2510           /* column  is in "off-processor" part */
2511 #if defined (PETSC_USE_CTABLE)
2512           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2513           colb --;
2514 #else
2515           colb = baij->colmap[col] - 1;
2516 #endif
2517           colb = colb/bs;
2518           colb += cend - cstart;
2519         }
2520         c->vscaleforrow[k][l] = colb;
2521       }
2522     }
2523   } else if (ctype == IS_COLORING_GHOSTED) {
2524     /* Get gtol mapping */
2525     PetscInt N = mat->cmap->N, *gtol;
2526     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2527     for (i=0; i<N; i++) gtol[i] = -1;
2528     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2529 
2530     c->vscale = 0; /* will be created in MatFDColoringApply() */
2531     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2532     for (k=0; k<c->ncolors; k++) {
2533       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2534       for (l=0; l<c->nrows[k]; l++) {
2535         col = c->columnsforrow[k][l];      /* global column index */
2536         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2537       }
2538     }
2539     ierr = PetscFree(gtol);CHKERRQ(ierr);
2540   }
2541   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2542 
2543   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2544   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2545   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2546   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2547     CHKMEMQ;
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 #undef __FUNCT__
2552 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2553 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2554 {
2555   Mat            B;
2556   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2557   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2558   Mat_SeqAIJ     *b;
2559   PetscErrorCode ierr;
2560   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2561   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2562   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2563 
2564   PetscFunctionBegin;
2565   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2566   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2567 
2568   /* ----------------------------------------------------------------
2569      Tell every processor the number of nonzeros per row
2570   */
2571   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2572   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2573     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2574   }
2575   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2576   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2577   displs     = recvcounts + size;
2578   for (i=0; i<size; i++) {
2579     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2580     displs[i]     = A->rmap->range[i]/bs;
2581   }
2582 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2583   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2584 #else
2585   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2586 #endif
2587   /* ---------------------------------------------------------------
2588      Create the sequential matrix of the same type as the local block diagonal
2589   */
2590   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2591   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2592   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2593   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2594   b = (Mat_SeqAIJ *)B->data;
2595 
2596   /*--------------------------------------------------------------------
2597     Copy my part of matrix column indices over
2598   */
2599   sendcount  = ad->nz + bd->nz;
2600   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2601   a_jsendbuf = ad->j;
2602   b_jsendbuf = bd->j;
2603   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2604   cnt        = 0;
2605   for (i=0; i<n; i++) {
2606 
2607     /* put in lower diagonal portion */
2608     m = bd->i[i+1] - bd->i[i];
2609     while (m > 0) {
2610       /* is it above diagonal (in bd (compressed) numbering) */
2611       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2612       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2613       m--;
2614     }
2615 
2616     /* put in diagonal portion */
2617     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2618       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2619     }
2620 
2621     /* put in upper diagonal portion */
2622     while (m-- > 0) {
2623       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2624     }
2625   }
2626   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2627 
2628   /*--------------------------------------------------------------------
2629     Gather all column indices to all processors
2630   */
2631   for (i=0; i<size; i++) {
2632     recvcounts[i] = 0;
2633     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2634       recvcounts[i] += lens[j];
2635     }
2636   }
2637   displs[0]  = 0;
2638   for (i=1; i<size; i++) {
2639     displs[i] = displs[i-1] + recvcounts[i-1];
2640   }
2641 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2642   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2643 #else
2644   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2645 #endif
2646   /*--------------------------------------------------------------------
2647     Assemble the matrix into useable form (note numerical values not yet set)
2648   */
2649   /* set the b->ilen (length of each row) values */
2650   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2651   /* set the b->i indices */
2652   b->i[0] = 0;
2653   for (i=1; i<=A->rmap->N/bs; i++) {
2654     b->i[i] = b->i[i-1] + lens[i-1];
2655   }
2656   ierr = PetscFree(lens);CHKERRQ(ierr);
2657   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2658   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2659   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2660 
2661   if (A->symmetric){
2662     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2663   } else if (A->hermitian) {
2664     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2665   } else if (A->structurally_symmetric) {
2666     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2667   }
2668   *newmat = B;
2669   PetscFunctionReturn(0);
2670 }
2671 
2672 #undef __FUNCT__
2673 #define __FUNCT__ "MatSOR_MPIBAIJ"
2674 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2675 {
2676   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2677   PetscErrorCode ierr;
2678   Vec            bb1 = 0;
2679 
2680   PetscFunctionBegin;
2681   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2682     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2683   }
2684 
2685   if (flag == SOR_APPLY_UPPER) {
2686     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2687     PetscFunctionReturn(0);
2688   }
2689 
2690   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2691     if (flag & SOR_ZERO_INITIAL_GUESS) {
2692       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2693       its--;
2694     }
2695 
2696     while (its--) {
2697       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2698       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2699 
2700       /* update rhs: bb1 = bb - B*x */
2701       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2702       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2703 
2704       /* local sweep */
2705       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2706     }
2707   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2708     if (flag & SOR_ZERO_INITIAL_GUESS) {
2709       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2710       its--;
2711     }
2712     while (its--) {
2713       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2714       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2715 
2716       /* update rhs: bb1 = bb - B*x */
2717       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2718       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2719 
2720       /* local sweep */
2721       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2722     }
2723   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2724     if (flag & SOR_ZERO_INITIAL_GUESS) {
2725       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2726       its--;
2727     }
2728     while (its--) {
2729       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2730       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2731 
2732       /* update rhs: bb1 = bb - B*x */
2733       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2734       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2735 
2736       /* local sweep */
2737       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2738     }
2739   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2740 
2741   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2742   PetscFunctionReturn(0);
2743 }
2744 
2745 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2746 
2747 
2748 /* -------------------------------------------------------------------*/
2749 static struct _MatOps MatOps_Values = {
2750        MatSetValues_MPIBAIJ,
2751        MatGetRow_MPIBAIJ,
2752        MatRestoreRow_MPIBAIJ,
2753        MatMult_MPIBAIJ,
2754 /* 4*/ MatMultAdd_MPIBAIJ,
2755        MatMultTranspose_MPIBAIJ,
2756        MatMultTransposeAdd_MPIBAIJ,
2757        0,
2758        0,
2759        0,
2760 /*10*/ 0,
2761        0,
2762        0,
2763        MatSOR_MPIBAIJ,
2764        MatTranspose_MPIBAIJ,
2765 /*15*/ MatGetInfo_MPIBAIJ,
2766        MatEqual_MPIBAIJ,
2767        MatGetDiagonal_MPIBAIJ,
2768        MatDiagonalScale_MPIBAIJ,
2769        MatNorm_MPIBAIJ,
2770 /*20*/ MatAssemblyBegin_MPIBAIJ,
2771        MatAssemblyEnd_MPIBAIJ,
2772        MatSetOption_MPIBAIJ,
2773        MatZeroEntries_MPIBAIJ,
2774 /*24*/ MatZeroRows_MPIBAIJ,
2775        0,
2776        0,
2777        0,
2778        0,
2779 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2780        0,
2781        0,
2782        0,
2783        0,
2784 /*34*/ MatDuplicate_MPIBAIJ,
2785        0,
2786        0,
2787        0,
2788        0,
2789 /*39*/ MatAXPY_MPIBAIJ,
2790        MatGetSubMatrices_MPIBAIJ,
2791        MatIncreaseOverlap_MPIBAIJ,
2792        MatGetValues_MPIBAIJ,
2793        MatCopy_MPIBAIJ,
2794 /*44*/ 0,
2795        MatScale_MPIBAIJ,
2796        0,
2797        0,
2798        0,
2799 /*49*/ MatSetBlockSize_MPIBAIJ,
2800        0,
2801        0,
2802        0,
2803        0,
2804 /*54*/ MatFDColoringCreate_MPIBAIJ,
2805        0,
2806        MatSetUnfactored_MPIBAIJ,
2807        MatPermute_MPIBAIJ,
2808        MatSetValuesBlocked_MPIBAIJ,
2809 /*59*/ MatGetSubMatrix_MPIBAIJ,
2810        MatDestroy_MPIBAIJ,
2811        MatView_MPIBAIJ,
2812        0,
2813        0,
2814 /*64*/ 0,
2815        0,
2816        0,
2817        0,
2818        0,
2819 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2820        0,
2821        0,
2822        0,
2823        0,
2824 /*74*/ 0,
2825        MatFDColoringApply_BAIJ,
2826        0,
2827        0,
2828        0,
2829 /*79*/ 0,
2830        0,
2831        0,
2832        0,
2833        MatLoad_MPIBAIJ,
2834 /*84*/ 0,
2835        0,
2836        0,
2837        0,
2838        0,
2839 /*89*/ 0,
2840        0,
2841        0,
2842        0,
2843        0,
2844 /*94*/ 0,
2845        0,
2846        0,
2847        0,
2848        0,
2849 /*99*/ 0,
2850        0,
2851        0,
2852        0,
2853        0,
2854 /*104*/0,
2855        MatRealPart_MPIBAIJ,
2856        MatImaginaryPart_MPIBAIJ,
2857        0,
2858        0,
2859 /*109*/0,
2860        0,
2861        0,
2862        0,
2863        0,
2864 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2865        0,
2866        MatGetGhosts_MPIBAIJ,
2867        0,
2868        0,
2869 /*119*/0,
2870        0,
2871        0,
2872        0
2873 };
2874 
2875 EXTERN_C_BEGIN
2876 #undef __FUNCT__
2877 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2878 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool  *iscopy,MatReuse reuse,Mat *a)
2879 {
2880   PetscFunctionBegin;
2881   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2882   *iscopy = PETSC_FALSE;
2883   PetscFunctionReturn(0);
2884 }
2885 EXTERN_C_END
2886 
2887 EXTERN_C_BEGIN
2888 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2889 EXTERN_C_END
2890 
2891 EXTERN_C_BEGIN
2892 #undef __FUNCT__
2893 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2894 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2895 {
2896   PetscInt       m,rstart,cstart,cend;
2897   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2898   const PetscInt *JJ=0;
2899   PetscScalar    *values=0;
2900   PetscErrorCode ierr;
2901 
2902   PetscFunctionBegin;
2903 
2904   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2905   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2906   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2907   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2908   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2909   m      = B->rmap->n/bs;
2910   rstart = B->rmap->rstart/bs;
2911   cstart = B->cmap->rstart/bs;
2912   cend   = B->cmap->rend/bs;
2913 
2914   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2915   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2916   for (i=0; i<m; i++) {
2917     nz = ii[i+1] - ii[i];
2918     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2919     nz_max = PetscMax(nz_max,nz);
2920     JJ  = jj + ii[i];
2921     for (j=0; j<nz; j++) {
2922       if (*JJ >= cstart) break;
2923       JJ++;
2924     }
2925     d = 0;
2926     for (; j<nz; j++) {
2927       if (*JJ++ >= cend) break;
2928       d++;
2929     }
2930     d_nnz[i] = d;
2931     o_nnz[i] = nz - d;
2932   }
2933   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2934   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2935 
2936   values = (PetscScalar*)V;
2937   if (!values) {
2938     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2939     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2940   }
2941   for (i=0; i<m; i++) {
2942     PetscInt          row    = i + rstart;
2943     PetscInt          ncols  = ii[i+1] - ii[i];
2944     const PetscInt    *icols = jj + ii[i];
2945     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2946     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2947   }
2948 
2949   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2950   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2951   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2952 
2953   PetscFunctionReturn(0);
2954 }
2955 EXTERN_C_END
2956 
2957 #undef __FUNCT__
2958 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2959 /*@C
2960    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2961    (the default parallel PETSc format).
2962 
2963    Collective on MPI_Comm
2964 
2965    Input Parameters:
2966 +  A - the matrix
2967 .  bs - the block size
2968 .  i - the indices into j for the start of each local row (starts with zero)
2969 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2970 -  v - optional values in the matrix
2971 
2972    Level: developer
2973 
2974 .keywords: matrix, aij, compressed row, sparse, parallel
2975 
2976 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2977 @*/
2978 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2979 {
2980   PetscErrorCode ierr;
2981 
2982   PetscFunctionBegin;
2983   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2984   PetscFunctionReturn(0);
2985 }
2986 
2987 EXTERN_C_BEGIN
2988 #undef __FUNCT__
2989 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2990 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2991 {
2992   Mat_MPIBAIJ    *b;
2993   PetscErrorCode ierr;
2994   PetscInt       i, newbs = PetscAbs(bs);
2995 
2996   PetscFunctionBegin;
2997   if (bs < 0) {
2998     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2999       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3000     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3001     bs   = PetscAbs(bs);
3002   }
3003   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
3004   bs = newbs;
3005 
3006 
3007   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
3008   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3009   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3010   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3011   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3012 
3013   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3014   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3015   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3016   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3017 
3018   if (d_nnz) {
3019     for (i=0; i<B->rmap->n/bs; i++) {
3020       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3021     }
3022   }
3023   if (o_nnz) {
3024     for (i=0; i<B->rmap->n/bs; i++) {
3025       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3026     }
3027   }
3028 
3029   b = (Mat_MPIBAIJ*)B->data;
3030   b->bs2 = bs*bs;
3031   b->mbs = B->rmap->n/bs;
3032   b->nbs = B->cmap->n/bs;
3033   b->Mbs = B->rmap->N/bs;
3034   b->Nbs = B->cmap->N/bs;
3035 
3036   for (i=0; i<=b->size; i++) {
3037     b->rangebs[i] = B->rmap->range[i]/bs;
3038   }
3039   b->rstartbs = B->rmap->rstart/bs;
3040   b->rendbs   = B->rmap->rend/bs;
3041   b->cstartbs = B->cmap->rstart/bs;
3042   b->cendbs   = B->cmap->rend/bs;
3043 
3044   if (!B->preallocated) {
3045     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3046     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3047     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3048     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3049     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3050     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3051     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3052     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3053     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3054   }
3055 
3056   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3057   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3058   B->preallocated = PETSC_TRUE;
3059   PetscFunctionReturn(0);
3060 }
3061 EXTERN_C_END
3062 
3063 EXTERN_C_BEGIN
3064 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3065 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3066 EXTERN_C_END
3067 
3068 
3069 EXTERN_C_BEGIN
3070 #undef __FUNCT__
3071 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3072 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3073 {
3074   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3075   PetscErrorCode ierr;
3076   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3077   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3078   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3079 
3080   PetscFunctionBegin;
3081   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3082   ii[0] = 0;
3083   CHKMEMQ;
3084   for (i=0; i<M; i++) {
3085     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3086     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3087     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3088     /* remove one from count of matrix has diagonal */
3089     for (j=id[i]; j<id[i+1]; j++) {
3090       if (jd[j] == i) {ii[i+1]--;break;}
3091     }
3092   CHKMEMQ;
3093   }
3094   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3095   cnt = 0;
3096   for (i=0; i<M; i++) {
3097     for (j=io[i]; j<io[i+1]; j++) {
3098       if (garray[jo[j]] > rstart) break;
3099       jj[cnt++] = garray[jo[j]];
3100   CHKMEMQ;
3101     }
3102     for (k=id[i]; k<id[i+1]; k++) {
3103       if (jd[k] != i) {
3104         jj[cnt++] = rstart + jd[k];
3105   CHKMEMQ;
3106       }
3107     }
3108     for (;j<io[i+1]; j++) {
3109       jj[cnt++] = garray[jo[j]];
3110   CHKMEMQ;
3111     }
3112   }
3113   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3114   PetscFunctionReturn(0);
3115 }
3116 EXTERN_C_END
3117 
3118 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3119 EXTERN_C_BEGIN
3120 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
3121 EXTERN_C_END
3122 
3123 EXTERN_C_BEGIN
3124 #undef __FUNCT__
3125 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3126 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
3127 {
3128   PetscErrorCode ierr;
3129   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3130   Mat            B;
3131   Mat_MPIAIJ     *b;
3132 
3133   PetscFunctionBegin;
3134   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3135 
3136   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3137   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3138   ierr = MatSetType(B,newtype);CHKERRQ(ierr);
3139   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3140   b = (Mat_MPIAIJ*) B->data;
3141 
3142   ierr = MatDestroy(b->A);CHKERRQ(ierr);
3143   ierr = MatDestroy(b->B);CHKERRQ(ierr);
3144   ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3145   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3146   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3147   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3148   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3149   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3150   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3151   if (reuse == MAT_REUSE_MATRIX) {
3152     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3153   } else {
3154    *newmat = B;
3155   }
3156   PetscFunctionReturn(0);
3157 }
3158 EXTERN_C_END
3159 
3160 EXTERN_C_BEGIN
3161 #if defined(PETSC_HAVE_MUMPS)
3162 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3163 #endif
3164 EXTERN_C_END
3165 
3166 /*MC
3167    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3168 
3169    Options Database Keys:
3170 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3171 . -mat_block_size <bs> - set the blocksize used to store the matrix
3172 - -mat_use_hash_table <fact>
3173 
3174   Level: beginner
3175 
3176 .seealso: MatCreateMPIBAIJ
3177 M*/
3178 
3179 EXTERN_C_BEGIN
3180 #undef __FUNCT__
3181 #define __FUNCT__ "MatCreate_MPIBAIJ"
3182 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3183 {
3184   Mat_MPIBAIJ    *b;
3185   PetscErrorCode ierr;
3186   PetscBool      flg;
3187 
3188   PetscFunctionBegin;
3189   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3190   B->data = (void*)b;
3191 
3192 
3193   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3194   B->assembled  = PETSC_FALSE;
3195 
3196   B->insertmode = NOT_SET_VALUES;
3197   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3198   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3199 
3200   /* build local table of row and column ownerships */
3201   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3202 
3203   /* build cache for off array entries formed */
3204   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3205   b->donotstash  = PETSC_FALSE;
3206   b->colmap      = PETSC_NULL;
3207   b->garray      = PETSC_NULL;
3208   b->roworiented = PETSC_TRUE;
3209 
3210   /* stuff used in block assembly */
3211   b->barray       = 0;
3212 
3213   /* stuff used for matrix vector multiply */
3214   b->lvec         = 0;
3215   b->Mvctx        = 0;
3216 
3217   /* stuff for MatGetRow() */
3218   b->rowindices   = 0;
3219   b->rowvalues    = 0;
3220   b->getrowactive = PETSC_FALSE;
3221 
3222   /* hash table stuff */
3223   b->ht           = 0;
3224   b->hd           = 0;
3225   b->ht_size      = 0;
3226   b->ht_flag      = PETSC_FALSE;
3227   b->ht_fact      = 0;
3228   b->ht_total_ct  = 0;
3229   b->ht_insert_ct = 0;
3230 
3231   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3232     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3233     if (flg) {
3234       PetscReal fact = 1.39;
3235       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3236       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3237       if (fact <= 1.0) fact = 1.39;
3238       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3239       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3240     }
3241   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3242 
3243 #if defined(PETSC_HAVE_MUMPS)
3244   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3245 #endif
3246   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3247                                      "MatConvert_MPIBAIJ_MPIAdj",
3248                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3249   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3250                                      "MatConvert_MPIBAIJ_MPIAIJ",
3251                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3252   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3253                                      "MatStoreValues_MPIBAIJ",
3254                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3255   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3256                                      "MatRetrieveValues_MPIBAIJ",
3257                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3258   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3259                                      "MatGetDiagonalBlock_MPIBAIJ",
3260                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3261   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3262                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3263                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3264   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3265 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3266 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3267   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3268                                      "MatDiagonalScaleLocal_MPIBAIJ",
3269                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3270   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3271                                      "MatSetHashTableFactor_MPIBAIJ",
3272                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3273   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3274   PetscFunctionReturn(0);
3275 }
3276 EXTERN_C_END
3277 
3278 /*MC
3279    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3280 
3281    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3282    and MATMPIBAIJ otherwise.
3283 
3284    Options Database Keys:
3285 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3286 
3287   Level: beginner
3288 
3289 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3290 M*/
3291 
3292 EXTERN_C_BEGIN
3293 #undef __FUNCT__
3294 #define __FUNCT__ "MatCreate_BAIJ"
3295 PetscErrorCode  MatCreate_BAIJ(Mat A)
3296 {
3297   PetscErrorCode ierr;
3298   PetscMPIInt    size;
3299 
3300   PetscFunctionBegin;
3301   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3302   if (size == 1) {
3303     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3304   } else {
3305     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3306   }
3307   PetscFunctionReturn(0);
3308 }
3309 EXTERN_C_END
3310 
3311 #undef __FUNCT__
3312 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3313 /*@C
3314    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3315    (block compressed row).  For good matrix assembly performance
3316    the user should preallocate the matrix storage by setting the parameters
3317    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3318    performance can be increased by more than a factor of 50.
3319 
3320    Collective on Mat
3321 
3322    Input Parameters:
3323 +  A - the matrix
3324 .  bs   - size of blockk
3325 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3326            submatrix  (same for all local rows)
3327 .  d_nnz - array containing the number of block nonzeros in the various block rows
3328            of the in diagonal portion of the local (possibly different for each block
3329            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3330 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3331            submatrix (same for all local rows).
3332 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3333            off-diagonal portion of the local submatrix (possibly different for
3334            each block row) or PETSC_NULL.
3335 
3336    If the *_nnz parameter is given then the *_nz parameter is ignored
3337 
3338    Options Database Keys:
3339 +   -mat_block_size - size of the blocks to use
3340 -   -mat_use_hash_table <fact>
3341 
3342    Notes:
3343    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3344    than it must be used on all processors that share the object for that argument.
3345 
3346    Storage Information:
3347    For a square global matrix we define each processor's diagonal portion
3348    to be its local rows and the corresponding columns (a square submatrix);
3349    each processor's off-diagonal portion encompasses the remainder of the
3350    local matrix (a rectangular submatrix).
3351 
3352    The user can specify preallocated storage for the diagonal part of
3353    the local submatrix with either d_nz or d_nnz (not both).  Set
3354    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3355    memory allocation.  Likewise, specify preallocated storage for the
3356    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3357 
3358    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3359    the figure below we depict these three local rows and all columns (0-11).
3360 
3361 .vb
3362            0 1 2 3 4 5 6 7 8 9 10 11
3363           -------------------
3364    row 3  |  o o o d d d o o o o o o
3365    row 4  |  o o o d d d o o o o o o
3366    row 5  |  o o o d d d o o o o o o
3367           -------------------
3368 .ve
3369 
3370    Thus, any entries in the d locations are stored in the d (diagonal)
3371    submatrix, and any entries in the o locations are stored in the
3372    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3373    stored simply in the MATSEQBAIJ format for compressed row storage.
3374 
3375    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3376    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3377    In general, for PDE problems in which most nonzeros are near the diagonal,
3378    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3379    or you will get TERRIBLE performance; see the users' manual chapter on
3380    matrices.
3381 
3382    You can call MatGetInfo() to get information on how effective the preallocation was;
3383    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3384    You can also run with the option -info and look for messages with the string
3385    malloc in them to see if additional memory allocation was needed.
3386 
3387    Level: intermediate
3388 
3389 .keywords: matrix, block, aij, compressed row, sparse, parallel
3390 
3391 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3392 @*/
3393 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3394 {
3395   PetscErrorCode ierr;
3396 
3397   PetscFunctionBegin;
3398   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3399   PetscFunctionReturn(0);
3400 }
3401 
3402 #undef __FUNCT__
3403 #define __FUNCT__ "MatCreateMPIBAIJ"
3404 /*@C
3405    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3406    (block compressed row).  For good matrix assembly performance
3407    the user should preallocate the matrix storage by setting the parameters
3408    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3409    performance can be increased by more than a factor of 50.
3410 
3411    Collective on MPI_Comm
3412 
3413    Input Parameters:
3414 +  comm - MPI communicator
3415 .  bs   - size of blockk
3416 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3417            This value should be the same as the local size used in creating the
3418            y vector for the matrix-vector product y = Ax.
3419 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3420            This value should be the same as the local size used in creating the
3421            x vector for the matrix-vector product y = Ax.
3422 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3423 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3424 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3425            submatrix  (same for all local rows)
3426 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3427            of the in diagonal portion of the local (possibly different for each block
3428            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3429 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3430            submatrix (same for all local rows).
3431 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3432            off-diagonal portion of the local submatrix (possibly different for
3433            each block row) or PETSC_NULL.
3434 
3435    Output Parameter:
3436 .  A - the matrix
3437 
3438    Options Database Keys:
3439 +   -mat_block_size - size of the blocks to use
3440 -   -mat_use_hash_table <fact>
3441 
3442    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3443    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3444    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3445 
3446    Notes:
3447    If the *_nnz parameter is given then the *_nz parameter is ignored
3448 
3449    A nonzero block is any block that as 1 or more nonzeros in it
3450 
3451    The user MUST specify either the local or global matrix dimensions
3452    (possibly both).
3453 
3454    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3455    than it must be used on all processors that share the object for that argument.
3456 
3457    Storage Information:
3458    For a square global matrix we define each processor's diagonal portion
3459    to be its local rows and the corresponding columns (a square submatrix);
3460    each processor's off-diagonal portion encompasses the remainder of the
3461    local matrix (a rectangular submatrix).
3462 
3463    The user can specify preallocated storage for the diagonal part of
3464    the local submatrix with either d_nz or d_nnz (not both).  Set
3465    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3466    memory allocation.  Likewise, specify preallocated storage for the
3467    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3468 
3469    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3470    the figure below we depict these three local rows and all columns (0-11).
3471 
3472 .vb
3473            0 1 2 3 4 5 6 7 8 9 10 11
3474           -------------------
3475    row 3  |  o o o d d d o o o o o o
3476    row 4  |  o o o d d d o o o o o o
3477    row 5  |  o o o d d d o o o o o o
3478           -------------------
3479 .ve
3480 
3481    Thus, any entries in the d locations are stored in the d (diagonal)
3482    submatrix, and any entries in the o locations are stored in the
3483    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3484    stored simply in the MATSEQBAIJ format for compressed row storage.
3485 
3486    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3487    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3488    In general, for PDE problems in which most nonzeros are near the diagonal,
3489    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3490    or you will get TERRIBLE performance; see the users' manual chapter on
3491    matrices.
3492 
3493    Level: intermediate
3494 
3495 .keywords: matrix, block, aij, compressed row, sparse, parallel
3496 
3497 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3498 @*/
3499 PetscErrorCode  MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3500 {
3501   PetscErrorCode ierr;
3502   PetscMPIInt    size;
3503 
3504   PetscFunctionBegin;
3505   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3506   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3507   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3508   if (size > 1) {
3509     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3510     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3511   } else {
3512     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3513     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3514   }
3515   PetscFunctionReturn(0);
3516 }
3517 
3518 #undef __FUNCT__
3519 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3520 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3521 {
3522   Mat            mat;
3523   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3524   PetscErrorCode ierr;
3525   PetscInt       len=0;
3526 
3527   PetscFunctionBegin;
3528   *newmat       = 0;
3529   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3530   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3531   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3532   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3533 
3534   mat->factortype   = matin->factortype;
3535   mat->preallocated = PETSC_TRUE;
3536   mat->assembled    = PETSC_TRUE;
3537   mat->insertmode   = NOT_SET_VALUES;
3538 
3539   a      = (Mat_MPIBAIJ*)mat->data;
3540   mat->rmap->bs  = matin->rmap->bs;
3541   a->bs2   = oldmat->bs2;
3542   a->mbs   = oldmat->mbs;
3543   a->nbs   = oldmat->nbs;
3544   a->Mbs   = oldmat->Mbs;
3545   a->Nbs   = oldmat->Nbs;
3546 
3547   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3548   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3549 
3550   a->size         = oldmat->size;
3551   a->rank         = oldmat->rank;
3552   a->donotstash   = oldmat->donotstash;
3553   a->roworiented  = oldmat->roworiented;
3554   a->rowindices   = 0;
3555   a->rowvalues    = 0;
3556   a->getrowactive = PETSC_FALSE;
3557   a->barray       = 0;
3558   a->rstartbs     = oldmat->rstartbs;
3559   a->rendbs       = oldmat->rendbs;
3560   a->cstartbs     = oldmat->cstartbs;
3561   a->cendbs       = oldmat->cendbs;
3562 
3563   /* hash table stuff */
3564   a->ht           = 0;
3565   a->hd           = 0;
3566   a->ht_size      = 0;
3567   a->ht_flag      = oldmat->ht_flag;
3568   a->ht_fact      = oldmat->ht_fact;
3569   a->ht_total_ct  = 0;
3570   a->ht_insert_ct = 0;
3571 
3572   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3573   if (oldmat->colmap) {
3574 #if defined (PETSC_USE_CTABLE)
3575   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3576 #else
3577   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3578   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3579   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3580 #endif
3581   } else a->colmap = 0;
3582 
3583   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3584     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3585     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3586     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3587   } else a->garray = 0;
3588 
3589   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3590   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3591   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3592   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3593   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3594 
3595   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3596   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3597   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3598   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3599   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3600   *newmat = mat;
3601 
3602   PetscFunctionReturn(0);
3603 }
3604 
3605 #undef __FUNCT__
3606 #define __FUNCT__ "MatLoad_MPIBAIJ"
3607 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3608 {
3609   PetscErrorCode ierr;
3610   int            fd;
3611   PetscInt       i,nz,j,rstart,rend;
3612   PetscScalar    *vals,*buf;
3613   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3614   MPI_Status     status;
3615   PetscMPIInt    rank,size,maxnz;
3616   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3617   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3618   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3619   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3620   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3621   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3622 
3623   PetscFunctionBegin;
3624   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3625     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3626   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3627 
3628   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3629   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3630   if (!rank) {
3631     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3632     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3633     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3634   }
3635 
3636   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3637 
3638   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3639   M = header[1]; N = header[2];
3640 
3641   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3642   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3643   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3644 
3645   /* If global sizes are set, check if they are consistent with that given in the file */
3646   if (sizesset) {
3647     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3648   }
3649   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3650   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3651 
3652   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3653 
3654   /*
3655      This code adds extra rows to make sure the number of rows is
3656      divisible by the blocksize
3657   */
3658   Mbs        = M/bs;
3659   extra_rows = bs - M + bs*Mbs;
3660   if (extra_rows == bs) extra_rows = 0;
3661   else                  Mbs++;
3662   if (extra_rows && !rank) {
3663     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3664   }
3665 
3666   /* determine ownership of all rows */
3667   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3668     mbs        = Mbs/size + ((Mbs % size) > rank);
3669     m          = mbs*bs;
3670   } else { /* User set */
3671     m          = newmat->rmap->n;
3672     mbs        = m/bs;
3673   }
3674   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3675   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3676 
3677   /* process 0 needs enough room for process with most rows */
3678   if (!rank) {
3679     mmax = rowners[1];
3680     for (i=2; i<size; i++) {
3681       mmax = PetscMax(mmax,rowners[i]);
3682     }
3683     mmax*=bs;
3684   } else mmax = m;
3685 
3686   rowners[0] = 0;
3687   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3688   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3689   rstart = rowners[rank];
3690   rend   = rowners[rank+1];
3691 
3692   /* distribute row lengths to all processors */
3693   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3694   if (!rank) {
3695     mend = m;
3696     if (size == 1) mend = mend - extra_rows;
3697     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3698     for (j=mend; j<m; j++) locrowlens[j] = 1;
3699     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3700     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3701     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3702     for (j=0; j<m; j++) {
3703       procsnz[0] += locrowlens[j];
3704     }
3705     for (i=1; i<size; i++) {
3706       mend = browners[i+1] - browners[i];
3707       if (i == size-1) mend = mend - extra_rows;
3708       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3709       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3710       /* calculate the number of nonzeros on each processor */
3711       for (j=0; j<browners[i+1]-browners[i]; j++) {
3712         procsnz[i] += rowlengths[j];
3713       }
3714       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3715     }
3716     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3717   } else {
3718     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3719   }
3720 
3721   if (!rank) {
3722     /* determine max buffer needed and allocate it */
3723     maxnz = procsnz[0];
3724     for (i=1; i<size; i++) {
3725       maxnz = PetscMax(maxnz,procsnz[i]);
3726     }
3727     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3728 
3729     /* read in my part of the matrix column indices  */
3730     nz     = procsnz[0];
3731     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3732     mycols = ibuf;
3733     if (size == 1)  nz -= extra_rows;
3734     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3735     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3736 
3737     /* read in every ones (except the last) and ship off */
3738     for (i=1; i<size-1; i++) {
3739       nz   = procsnz[i];
3740       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3741       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3742     }
3743     /* read in the stuff for the last proc */
3744     if (size != 1) {
3745       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3746       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3747       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3748       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3749     }
3750     ierr = PetscFree(cols);CHKERRQ(ierr);
3751   } else {
3752     /* determine buffer space needed for message */
3753     nz = 0;
3754     for (i=0; i<m; i++) {
3755       nz += locrowlens[i];
3756     }
3757     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3758     mycols = ibuf;
3759     /* receive message of column indices*/
3760     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3761     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3762     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3763   }
3764 
3765   /* loop over local rows, determining number of off diagonal entries */
3766   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3767   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3768   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3769   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3770   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3771   rowcount = 0; nzcount = 0;
3772   for (i=0; i<mbs; i++) {
3773     dcount  = 0;
3774     odcount = 0;
3775     for (j=0; j<bs; j++) {
3776       kmax = locrowlens[rowcount];
3777       for (k=0; k<kmax; k++) {
3778         tmp = mycols[nzcount++]/bs;
3779         if (!mask[tmp]) {
3780           mask[tmp] = 1;
3781           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3782           else masked1[dcount++] = tmp;
3783         }
3784       }
3785       rowcount++;
3786     }
3787 
3788     dlens[i]  = dcount;
3789     odlens[i] = odcount;
3790 
3791     /* zero out the mask elements we set */
3792     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3793     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3794   }
3795 
3796 
3797   if (!sizesset) {
3798     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3799   }
3800   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3801 
3802   if (!rank) {
3803     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3804     /* read in my part of the matrix numerical values  */
3805     nz = procsnz[0];
3806     vals = buf;
3807     mycols = ibuf;
3808     if (size == 1)  nz -= extra_rows;
3809     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3810     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3811 
3812     /* insert into matrix */
3813     jj      = rstart*bs;
3814     for (i=0; i<m; i++) {
3815       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3816       mycols += locrowlens[i];
3817       vals   += locrowlens[i];
3818       jj++;
3819     }
3820     /* read in other processors (except the last one) and ship out */
3821     for (i=1; i<size-1; i++) {
3822       nz   = procsnz[i];
3823       vals = buf;
3824       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3825       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3826     }
3827     /* the last proc */
3828     if (size != 1){
3829       nz   = procsnz[i] - extra_rows;
3830       vals = buf;
3831       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3832       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3833       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3834     }
3835     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3836   } else {
3837     /* receive numeric values */
3838     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3839 
3840     /* receive message of values*/
3841     vals   = buf;
3842     mycols = ibuf;
3843     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3844     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3845     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3846 
3847     /* insert into matrix */
3848     jj      = rstart*bs;
3849     for (i=0; i<m; i++) {
3850       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3851       mycols += locrowlens[i];
3852       vals   += locrowlens[i];
3853       jj++;
3854     }
3855   }
3856   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3857   ierr = PetscFree(buf);CHKERRQ(ierr);
3858   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3859   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3860   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3861   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3862   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3863   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3864 
3865   PetscFunctionReturn(0);
3866 }
3867 
3868 #undef __FUNCT__
3869 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3870 /*@
3871    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3872 
3873    Input Parameters:
3874 .  mat  - the matrix
3875 .  fact - factor
3876 
3877    Not Collective, each process can use a different factor
3878 
3879    Level: advanced
3880 
3881   Notes:
3882    This can also be set by the command line option: -mat_use_hash_table <fact>
3883 
3884 .keywords: matrix, hashtable, factor, HT
3885 
3886 .seealso: MatSetOption()
3887 @*/
3888 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3889 {
3890   PetscErrorCode ierr;
3891 
3892   PetscFunctionBegin;
3893   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3894   PetscFunctionReturn(0);
3895 }
3896 
3897 EXTERN_C_BEGIN
3898 #undef __FUNCT__
3899 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3900 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3901 {
3902   Mat_MPIBAIJ *baij;
3903 
3904   PetscFunctionBegin;
3905   baij = (Mat_MPIBAIJ*)mat->data;
3906   baij->ht_fact = fact;
3907   PetscFunctionReturn(0);
3908 }
3909 EXTERN_C_END
3910 
3911 #undef __FUNCT__
3912 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3913 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3914 {
3915   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3916   PetscFunctionBegin;
3917   *Ad     = a->A;
3918   *Ao     = a->B;
3919   *colmap = a->garray;
3920   PetscFunctionReturn(0);
3921 }
3922 
3923 /*
3924     Special version for direct calls from Fortran (to eliminate two function call overheads
3925 */
3926 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3927 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3928 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3929 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3930 #endif
3931 
3932 #undef __FUNCT__
3933 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3934 /*@C
3935   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3936 
3937   Collective on Mat
3938 
3939   Input Parameters:
3940 + mat - the matrix
3941 . min - number of input rows
3942 . im - input rows
3943 . nin - number of input columns
3944 . in - input columns
3945 . v - numerical values input
3946 - addvin - INSERT_VALUES or ADD_VALUES
3947 
3948   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3949 
3950   Level: advanced
3951 
3952 .seealso:   MatSetValuesBlocked()
3953 @*/
3954 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3955 {
3956   /* convert input arguments to C version */
3957   Mat             mat = *matin;
3958   PetscInt        m = *min, n = *nin;
3959   InsertMode      addv = *addvin;
3960 
3961   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3962   const MatScalar *value;
3963   MatScalar       *barray=baij->barray;
3964   PetscBool       roworiented = baij->roworiented;
3965   PetscErrorCode  ierr;
3966   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3967   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3968   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3969 
3970   PetscFunctionBegin;
3971   /* tasks normally handled by MatSetValuesBlocked() */
3972   if (mat->insertmode == NOT_SET_VALUES) {
3973     mat->insertmode = addv;
3974   }
3975 #if defined(PETSC_USE_DEBUG)
3976   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3977   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3978 #endif
3979   if (mat->assembled) {
3980     mat->was_assembled = PETSC_TRUE;
3981     mat->assembled     = PETSC_FALSE;
3982   }
3983   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3984 
3985 
3986   if(!barray) {
3987     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3988     baij->barray = barray;
3989   }
3990 
3991   if (roworiented) {
3992     stepval = (n-1)*bs;
3993   } else {
3994     stepval = (m-1)*bs;
3995   }
3996   for (i=0; i<m; i++) {
3997     if (im[i] < 0) continue;
3998 #if defined(PETSC_USE_DEBUG)
3999     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4000 #endif
4001     if (im[i] >= rstart && im[i] < rend) {
4002       row = im[i] - rstart;
4003       for (j=0; j<n; j++) {
4004         /* If NumCol = 1 then a copy is not required */
4005         if ((roworiented) && (n == 1)) {
4006           barray = (MatScalar*)v + i*bs2;
4007         } else if((!roworiented) && (m == 1)) {
4008           barray = (MatScalar*)v + j*bs2;
4009         } else { /* Here a copy is required */
4010           if (roworiented) {
4011             value = v + i*(stepval+bs)*bs + j*bs;
4012           } else {
4013             value = v + j*(stepval+bs)*bs + i*bs;
4014           }
4015           for (ii=0; ii<bs; ii++,value+=stepval) {
4016             for (jj=0; jj<bs; jj++) {
4017               *barray++  = *value++;
4018             }
4019           }
4020           barray -=bs2;
4021         }
4022 
4023         if (in[j] >= cstart && in[j] < cend){
4024           col  = in[j] - cstart;
4025           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4026         }
4027         else if (in[j] < 0) continue;
4028 #if defined(PETSC_USE_DEBUG)
4029         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4030 #endif
4031         else {
4032           if (mat->was_assembled) {
4033             if (!baij->colmap) {
4034               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4035             }
4036 
4037 #if defined(PETSC_USE_DEBUG)
4038 #if defined (PETSC_USE_CTABLE)
4039             { PetscInt data;
4040               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4041               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4042             }
4043 #else
4044             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4045 #endif
4046 #endif
4047 #if defined (PETSC_USE_CTABLE)
4048 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4049             col  = (col - 1)/bs;
4050 #else
4051             col = (baij->colmap[in[j]] - 1)/bs;
4052 #endif
4053             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4054               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4055               col =  in[j];
4056             }
4057           }
4058           else col = in[j];
4059           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4060         }
4061       }
4062     } else {
4063       if (!baij->donotstash) {
4064         if (roworiented) {
4065           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4066         } else {
4067           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4068         }
4069       }
4070     }
4071   }
4072 
4073   /* task normally handled by MatSetValuesBlocked() */
4074   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4075   PetscFunctionReturn(0);
4076 }
4077 
4078 #undef __FUNCT__
4079 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4080 /*@
4081      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4082          CSR format the local rows.
4083 
4084    Collective on MPI_Comm
4085 
4086    Input Parameters:
4087 +  comm - MPI communicator
4088 .  bs - the block size, only a block size of 1 is supported
4089 .  m - number of local rows (Cannot be PETSC_DECIDE)
4090 .  n - This value should be the same as the local size used in creating the
4091        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4092        calculated if N is given) For square matrices n is almost always m.
4093 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4094 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4095 .   i - row indices
4096 .   j - column indices
4097 -   a - matrix values
4098 
4099    Output Parameter:
4100 .   mat - the matrix
4101 
4102    Level: intermediate
4103 
4104    Notes:
4105        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4106      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4107      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4108 
4109        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4110 
4111 .keywords: matrix, aij, compressed row, sparse, parallel
4112 
4113 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4114           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
4115 @*/
4116 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4117 {
4118   PetscErrorCode ierr;
4119 
4120 
4121  PetscFunctionBegin;
4122   if (i[0]) {
4123     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4124   }
4125   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4126   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4127   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4128   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4129   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4130   PetscFunctionReturn(0);
4131 }
4132