xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision fba77e201f7e04bdb29755266e71eb546b1bb6b1)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++){
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++){
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig){
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         } else if (in[j] < 0) continue;
229 #if defined(PETSC_USE_DEBUG)
230         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
231 #endif
232         else {
233           if (mat->was_assembled) {
234             if (!baij->colmap) {
235               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
236             }
237 #if defined (PETSC_USE_CTABLE)
238             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
239             col  = col - 1;
240 #else
241             col = baij->colmap[in[j]/bs] - 1;
242 #endif
243             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
244               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
245               col =  in[j];
246               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
247               B = baij->B;
248               b = (Mat_SeqBAIJ*)(B)->data;
249               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
250               ba=b->a;
251             } else col += in[j]%bs;
252           } else col = in[j];
253           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
254           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
260       if (!baij->donotstash) {
261         if (roworiented) {
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
263         } else {
264           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
265         }
266       }
267     }
268   }
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
275 {
276   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
277   const PetscScalar *value;
278   MatScalar         *barray=baij->barray;
279   PetscBool         roworiented = baij->roworiented;
280   PetscErrorCode    ierr;
281   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
282   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
283   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
284 
285   PetscFunctionBegin;
286   if(!barray) {
287     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
288     baij->barray = barray;
289   }
290 
291   if (roworiented) {
292     stepval = (n-1)*bs;
293   } else {
294     stepval = (m-1)*bs;
295   }
296   for (i=0; i<m; i++) {
297     if (im[i] < 0) continue;
298 #if defined(PETSC_USE_DEBUG)
299     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
300 #endif
301     if (im[i] >= rstart && im[i] < rend) {
302       row = im[i] - rstart;
303       for (j=0; j<n; j++) {
304         /* If NumCol = 1 then a copy is not required */
305         if ((roworiented) && (n == 1)) {
306           barray = (MatScalar*)v + i*bs2;
307         } else if((!roworiented) && (m == 1)) {
308           barray = (MatScalar*)v + j*bs2;
309         } else { /* Here a copy is required */
310           if (roworiented) {
311             value = v + (i*(stepval+bs) + j)*bs;
312           } else {
313 	    value = v + (j*(stepval+bs) + i)*bs;
314           }
315           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
316             for (jj=0; jj<bs; jj++) {
317               barray[jj]  = value[jj];
318             }
319             barray += bs;
320           }
321           barray -= bs2;
322         }
323 
324         if (in[j] >= cstart && in[j] < cend){
325           col  = in[j] - cstart;
326           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
327         }
328         else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined (PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined (PETSC_USE_CTABLE)
349 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
355               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col =  in[j];
357             }
358           }
359           else col = in[j];
360           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
361         }
362       }
363     } else {
364       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
365       if (!baij->donotstash) {
366         if (roworiented) {
367           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         } else {
369           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         }
371       }
372     }
373   }
374   PetscFunctionReturn(0);
375 }
376 
377 #define HASH_KEY 0.6180339887
378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 #undef __FUNCT__
382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
384 {
385   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
386   PetscBool      roworiented = baij->roworiented;
387   PetscErrorCode ierr;
388   PetscInt       i,j,row,col;
389   PetscInt       rstart_orig=mat->rmap->rstart;
390   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
391   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
392   PetscReal      tmp;
393   MatScalar      **HD = baij->hd,value;
394 #if defined(PETSC_USE_DEBUG)
395   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
396 #endif
397 
398   PetscFunctionBegin;
399   if (v) PetscValidScalarPointer(v,6);
400   for (i=0; i<m; i++) {
401 #if defined(PETSC_USE_DEBUG)
402     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
403     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
404 #endif
405       row = im[i];
406     if (row >= rstart_orig && row < rend_orig) {
407       for (j=0; j<n; j++) {
408         col = in[j];
409         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
423             if (idx == h1) {
424               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
425             }
426           }
427         }
428 #else
429         if (HT[idx] != key) {
430           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
431           if (idx == size) {
432             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
433             if (idx == h1) {
434               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
435             }
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453 #if defined(PETSC_USE_DEBUG)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463 {
464   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465   PetscBool         roworiented = baij->roworiented;
466   PetscErrorCode    ierr;
467   PetscInt          i,j,ii,jj,row,col;
468   PetscInt          rstart=baij->rstartbs;
469   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471   PetscReal         tmp;
472   MatScalar         **HD = baij->hd,*baij_a;
473   const PetscScalar *v_t,*value;
474 #if defined(PETSC_USE_DEBUG)
475   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476 #endif
477 
478   PetscFunctionBegin;
479 
480   if (roworiented) {
481     stepval = (n-1)*bs;
482   } else {
483     stepval = (m-1)*bs;
484   }
485   for (i=0; i<m; i++) {
486 #if defined(PETSC_USE_DEBUG)
487     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
488     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
489 #endif
490     row   = im[i];
491     v_t   = v + i*nbs2;
492     if (row >= rstart && row < rend) {
493       for (j=0; j<n; j++) {
494         col = in[j];
495 
496         /* Look up into the Hash Table */
497         key = row*Nbs+col+1;
498         h1  = HASH(size,key,tmp);
499 
500         idx = h1;
501 #if defined(PETSC_USE_DEBUG)
502         total_ct++;
503         insert_ct++;
504        if (HT[idx] != key) {
505           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
506           if (idx == size) {
507             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
508             if (idx == h1) {
509               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510             }
511           }
512         }
513 #else
514         if (HT[idx] != key) {
515           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
516           if (idx == size) {
517             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
518             if (idx == h1) {
519               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
520             }
521           }
522         }
523 #endif
524         baij_a = HD[idx];
525         if (roworiented) {
526           /*value = v + i*(stepval+bs)*bs + j*bs;*/
527           /* value = v + (i*(stepval+bs)+j)*bs; */
528           value = v_t;
529           v_t  += bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval) {
532               for (jj=ii; jj<bs2; jj+=bs) {
533                 baij_a[jj]  += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval) {
538               for (jj=ii; jj<bs2; jj+=bs) {
539                 baij_a[jj]  = *value++;
540               }
541             }
542           }
543         } else {
544           value = v + j*(stepval+bs)*bs + i*bs;
545           if (addv == ADD_VALUES) {
546             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
547               for (jj=0; jj<bs; jj++) {
548                 baij_a[jj]  += *value++;
549               }
550             }
551           } else {
552             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
553               for (jj=0; jj<bs; jj++) {
554                 baij_a[jj]  = *value++;
555               }
556             }
557           }
558         }
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetValues_MPIBAIJ"
579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580 {
581   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscErrorCode ierr;
583   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
584   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
585 
586   PetscFunctionBegin;
587   for (i=0; i<m; i++) {
588     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
591       row = idxm[i] - bsrstart;
592       for (j=0; j<n; j++) {
593         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595         if (idxn[j] >= bscstart && idxn[j] < bscend){
596           col = idxn[j] - bscstart;
597           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598         } else {
599           if (!baij->colmap) {
600             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
601           }
602 #if defined (PETSC_USE_CTABLE)
603           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
604           data --;
605 #else
606           data = baij->colmap[idxn[j]/bs]-1;
607 #endif
608           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
609           else {
610             col  = data + idxn[j]%bs;
611             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
612           }
613         }
614       }
615     } else {
616       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
617     }
618   }
619  PetscFunctionReturn(0);
620 }
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "MatNorm_MPIBAIJ"
624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
625 {
626   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
627   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
628   PetscErrorCode ierr;
629   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
630   PetscReal      sum = 0.0;
631   MatScalar      *v;
632 
633   PetscFunctionBegin;
634   if (baij->size == 1) {
635     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
636   } else {
637     if (type == NORM_FROBENIUS) {
638       v = amat->a;
639       nz = amat->nz*bs2;
640       for (i=0; i<nz; i++) {
641 #if defined(PETSC_USE_COMPLEX)
642         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
643 #else
644         sum += (*v)*(*v); v++;
645 #endif
646       }
647       v = bmat->a;
648       nz = bmat->nz*bs2;
649       for (i=0; i<nz; i++) {
650 #if defined(PETSC_USE_COMPLEX)
651         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
652 #else
653         sum += (*v)*(*v); v++;
654 #endif
655       }
656       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
657       *nrm = PetscSqrtReal(*nrm);
658     } else if (type == NORM_1) { /* max column sum */
659       PetscReal *tmp,*tmp2;
660       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
661       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
662       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
663       v = amat->a; jj = amat->j;
664       for (i=0; i<amat->nz; i++) {
665         for (j=0; j<bs; j++){
666           col = bs*(cstart + *jj) + j; /* column index */
667           for (row=0; row<bs; row++){
668             tmp[col] += PetscAbsScalar(*v);  v++;
669           }
670         }
671         jj++;
672       }
673       v = bmat->a; jj = bmat->j;
674       for (i=0; i<bmat->nz; i++) {
675         for (j=0; j<bs; j++){
676           col = bs*garray[*jj] + j;
677           for (row=0; row<bs; row++){
678             tmp[col] += PetscAbsScalar(*v); v++;
679           }
680         }
681         jj++;
682       }
683       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
684       *nrm = 0.0;
685       for (j=0; j<mat->cmap->N; j++) {
686         if (tmp2[j] > *nrm) *nrm = tmp2[j];
687       }
688       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
689     } else if (type == NORM_INFINITY) { /* max row sum */
690       PetscReal *sums;
691       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
692       sum = 0.0;
693       for (j=0; j<amat->mbs; j++) {
694         for (row=0; row<bs; row++) sums[row] = 0.0;
695         v = amat->a + bs2*amat->i[j];
696         nz = amat->i[j+1]-amat->i[j];
697         for (i=0; i<nz; i++) {
698           for (col=0; col<bs; col++){
699             for (row=0; row<bs; row++){
700               sums[row] += PetscAbsScalar(*v); v++;
701             }
702           }
703         }
704         v = bmat->a + bs2*bmat->i[j];
705         nz = bmat->i[j+1]-bmat->i[j];
706         for (i=0; i<nz; i++) {
707           for (col=0; col<bs; col++){
708             for (row=0; row<bs; row++){
709               sums[row] += PetscAbsScalar(*v); v++;
710             }
711           }
712         }
713         for (row=0; row<bs; row++){
714           if (sums[row] > sum) sum = sums[row];
715         }
716       }
717       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
718       ierr = PetscFree(sums);CHKERRQ(ierr);
719     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 /*
725   Creates the hash table, and sets the table
726   This table is created only once.
727   If new entried need to be added to the matrix
728   then the hash table has to be destroyed and
729   recreated.
730 */
731 #undef __FUNCT__
732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
734 {
735   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
736   Mat            A = baij->A,B=baij->B;
737   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
738   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
739   PetscErrorCode ierr;
740   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
741   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
742   PetscInt       *HT,key;
743   MatScalar      **HD;
744   PetscReal      tmp;
745 #if defined(PETSC_USE_INFO)
746   PetscInt       ct=0,max=0;
747 #endif
748 
749   PetscFunctionBegin;
750   if (baij->ht) PetscFunctionReturn(0);
751 
752   baij->ht_size = (PetscInt)(factor*nz);
753   ht_size       = baij->ht_size;
754 
755   /* Allocate Memory for Hash Table */
756   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
757   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
758   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
759   HD   = baij->hd;
760   HT   = baij->ht;
761 
762   /* Loop Over A */
763   for (i=0; i<a->mbs; i++) {
764     for (j=ai[i]; j<ai[i+1]; j++) {
765       row = i+rstart;
766       col = aj[j]+cstart;
767 
768       key = row*Nbs + col + 1;
769       h1  = HASH(ht_size,key,tmp);
770       for (k=0; k<ht_size; k++){
771         if (!HT[(h1+k)%ht_size]) {
772           HT[(h1+k)%ht_size] = key;
773           HD[(h1+k)%ht_size] = a->a + j*bs2;
774           break;
775 #if defined(PETSC_USE_INFO)
776         } else {
777           ct++;
778 #endif
779         }
780       }
781 #if defined(PETSC_USE_INFO)
782       if (k> max) max = k;
783 #endif
784     }
785   }
786   /* Loop Over B */
787   for (i=0; i<b->mbs; i++) {
788     for (j=bi[i]; j<bi[i+1]; j++) {
789       row = i+rstart;
790       col = garray[bj[j]];
791       key = row*Nbs + col + 1;
792       h1  = HASH(ht_size,key,tmp);
793       for (k=0; k<ht_size; k++){
794         if (!HT[(h1+k)%ht_size]) {
795           HT[(h1+k)%ht_size] = key;
796           HD[(h1+k)%ht_size] = b->a + j*bs2;
797           break;
798 #if defined(PETSC_USE_INFO)
799         } else {
800           ct++;
801 #endif
802         }
803       }
804 #if defined(PETSC_USE_INFO)
805       if (k> max) max = k;
806 #endif
807     }
808   }
809 
810   /* Print Summary */
811 #if defined(PETSC_USE_INFO)
812   for (i=0,j=0; i<ht_size; i++) {
813     if (HT[i]) {j++;}
814   }
815   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
816 #endif
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
823 {
824   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
825   PetscErrorCode ierr;
826   PetscInt       nstash,reallocs;
827   InsertMode     addv;
828 
829   PetscFunctionBegin;
830   if (baij->donotstash || mat->nooffprocentries) {
831     PetscFunctionReturn(0);
832   }
833 
834   /* make sure all processors are either in INSERTMODE or ADDMODE */
835   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
836   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
837   mat->insertmode = addv; /* in case this processor had no cache */
838 
839   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
840   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
841   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
842   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
843   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
844   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
851 {
852   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
853   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
854   PetscErrorCode ierr;
855   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
856   PetscInt       *row,*col;
857   PetscBool      r1,r2,r3,other_disassembled;
858   MatScalar      *val;
859   InsertMode     addv = mat->insertmode;
860   PetscMPIInt    n;
861 
862   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
863   PetscFunctionBegin;
864   if (!baij->donotstash && !mat->nooffprocentries) {
865     while (1) {
866       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
867       if (!flg) break;
868 
869       for (i=0; i<n;) {
870         /* Now identify the consecutive vals belonging to the same row */
871         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
872         if (j < n) ncols = j-i;
873         else       ncols = n-i;
874         /* Now assemble all these values with a single function call */
875         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
876         i = j;
877       }
878     }
879     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
880     /* Now process the block-stash. Since the values are stashed column-oriented,
881        set the roworiented flag to column oriented, and after MatSetValues()
882        restore the original flags */
883     r1 = baij->roworiented;
884     r2 = a->roworiented;
885     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
886     baij->roworiented = PETSC_FALSE;
887     a->roworiented    = PETSC_FALSE;
888     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
889     while (1) {
890       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
891       if (!flg) break;
892 
893       for (i=0; i<n;) {
894         /* Now identify the consecutive vals belonging to the same row */
895         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
896         if (j < n) ncols = j-i;
897         else       ncols = n-i;
898         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
899         i = j;
900       }
901     }
902     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
903     baij->roworiented = r1;
904     a->roworiented    = r2;
905     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
906   }
907 
908   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
909   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
910 
911   /* determine if any processor has disassembled, if so we must
912      also disassemble ourselfs, in order that we may reassemble. */
913   /*
914      if nonzero structure of submatrix B cannot change then we know that
915      no processor disassembled thus we can skip this stuff
916   */
917   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
918     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
919     if (mat->was_assembled && !other_disassembled) {
920       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
921     }
922   }
923 
924   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
925     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
926   }
927   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
928   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
929   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
930 
931 #if defined(PETSC_USE_INFO)
932   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
933     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
934     baij->ht_total_ct  = 0;
935     baij->ht_insert_ct = 0;
936   }
937 #endif
938   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
939     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
940     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
941     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
942   }
943 
944   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
945   baij->rowvalues = 0;
946   PetscFunctionReturn(0);
947 }
948 
949 #undef __FUNCT__
950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
952 {
953   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
954   PetscErrorCode    ierr;
955   PetscMPIInt       size = baij->size,rank = baij->rank;
956   PetscInt          bs = mat->rmap->bs;
957   PetscBool         iascii,isdraw;
958   PetscViewer       sviewer;
959   PetscViewerFormat format;
960 
961   PetscFunctionBegin;
962   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
963   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
964   if (iascii) {
965     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
966     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
967       MatInfo info;
968       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
969       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
970       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
971       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
972              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
973       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
974       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
977       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
978       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
979       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
980       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
981       PetscFunctionReturn(0);
982     } else if (format == PETSC_VIEWER_ASCII_INFO) {
983       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
984       PetscFunctionReturn(0);
985     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
986       PetscFunctionReturn(0);
987     }
988   }
989 
990   if (isdraw) {
991     PetscDraw       draw;
992     PetscBool  isnull;
993     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
994     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
995   }
996 
997   if (size == 1) {
998     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
999     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1000   } else {
1001     /* assemble the entire matrix onto first processor. */
1002     Mat         A;
1003     Mat_SeqBAIJ *Aloc;
1004     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1005     MatScalar   *a;
1006 
1007     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1008     /* Perhaps this should be the type of mat? */
1009     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1010     if (!rank) {
1011       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1012     } else {
1013       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1014     }
1015     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1016     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1017     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1018 
1019     /* copy over the A part */
1020     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1021     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1022     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1023 
1024     for (i=0; i<mbs; i++) {
1025       rvals[0] = bs*(baij->rstartbs + i);
1026       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1027       for (j=ai[i]; j<ai[i+1]; j++) {
1028         col = (baij->cstartbs+aj[j])*bs;
1029         for (k=0; k<bs; k++) {
1030           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1031           col++; a += bs;
1032         }
1033       }
1034     }
1035     /* copy over the B part */
1036     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1037     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1038     for (i=0; i<mbs; i++) {
1039       rvals[0] = bs*(baij->rstartbs + i);
1040       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1041       for (j=ai[i]; j<ai[i+1]; j++) {
1042         col = baij->garray[aj[j]]*bs;
1043         for (k=0; k<bs; k++) {
1044           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1045           col++; a += bs;
1046         }
1047       }
1048     }
1049     ierr = PetscFree(rvals);CHKERRQ(ierr);
1050     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1051     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     /*
1053        Everyone has to call to draw the matrix since the graphics waits are
1054        synchronized across all processors that share the PetscDraw object
1055     */
1056     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1057     if (!rank) {
1058       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1059     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1060       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1061       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1062     }
1063     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1064     ierr = MatDestroy(&A);CHKERRQ(ierr);
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1071 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1072 {
1073   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1074   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1075   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1076   PetscErrorCode ierr;
1077   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1078   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1079   int            fd;
1080   PetscScalar    *column_values;
1081   FILE           *file;
1082   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1083   PetscInt       message_count,flowcontrolcount;
1084 
1085   PetscFunctionBegin;
1086   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1087   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1088   nz   = bs2*(A->nz + B->nz);
1089   rlen = mat->rmap->n;
1090   if (!rank) {
1091     header[0] = MAT_FILE_CLASSID;
1092     header[1] = mat->rmap->N;
1093     header[2] = mat->cmap->N;
1094     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1095     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1096     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1097     /* get largest number of rows any processor has */
1098     range = mat->rmap->range;
1099     for (i=1; i<size; i++) {
1100       rlen = PetscMax(rlen,range[i+1] - range[i]);
1101     }
1102   } else {
1103     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1104   }
1105 
1106   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1107   /* compute lengths of each row  */
1108   for (i=0; i<a->mbs; i++) {
1109     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1110   }
1111   /* store the row lengths to the file */
1112   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1113   if (!rank) {
1114     MPI_Status status;
1115     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1116     rlen  = (range[1] - range[0])/bs;
1117     for (i=0; i<rlen; i++) {
1118       for (j=0; j<bs; j++) {
1119         row_lens[i*bs+j] = bs*crow_lens[i];
1120       }
1121     }
1122     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1123     for (i=1; i<size; i++) {
1124       rlen = (range[i+1] - range[i])/bs;
1125       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1126       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1127       for (k=0; k<rlen; k++) {
1128 	for (j=0; j<bs; j++) {
1129 	  row_lens[k*bs+j] = bs*crow_lens[k];
1130 	}
1131       }
1132       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1133     }
1134     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1135     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1136   } else {
1137     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1138     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1139     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1140   }
1141   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1142 
1143   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1144      information needed to make it for each row from a block row. This does require more communication but still not more than
1145      the communication needed for the nonzero values  */
1146   nzmax = nz; /*  space a largest processor needs */
1147   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1148   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1149   cnt  = 0;
1150   for (i=0; i<a->mbs; i++) {
1151     pcnt = cnt;
1152     for (j=B->i[i]; j<B->i[i+1]; j++) {
1153       if ( (col = garray[B->j[j]]) > cstart) break;
1154       for (l=0; l<bs; l++) {
1155 	column_indices[cnt++] = bs*col+l;
1156       }
1157     }
1158     for (k=A->i[i]; k<A->i[i+1]; k++) {
1159       for (l=0; l<bs; l++) {
1160         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1161       }
1162     }
1163     for (; j<B->i[i+1]; j++) {
1164       for (l=0; l<bs; l++) {
1165         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1166       }
1167     }
1168     len = cnt - pcnt;
1169     for (k=1; k<bs; k++) {
1170       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1171       cnt += len;
1172     }
1173   }
1174   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1175 
1176   /* store the columns to the file */
1177   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1178   if (!rank) {
1179     MPI_Status status;
1180     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1181     for (i=1; i<size; i++) {
1182       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1183       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1184       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1185       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1186     }
1187     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1188   } else {
1189     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1190     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1191     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1192     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1193   }
1194   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1195 
1196   /* load up the numerical values */
1197   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1198   cnt = 0;
1199   for (i=0; i<a->mbs; i++) {
1200     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1201     for (j=B->i[i]; j<B->i[i+1]; j++) {
1202       if ( garray[B->j[j]] > cstart) break;
1203       for (l=0; l<bs; l++) {
1204         for (ll=0; ll<bs; ll++) {
1205 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1206         }
1207       }
1208       cnt += bs;
1209     }
1210     for (k=A->i[i]; k<A->i[i+1]; k++) {
1211       for (l=0; l<bs; l++) {
1212         for (ll=0; ll<bs; ll++) {
1213           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1214         }
1215       }
1216       cnt += bs;
1217     }
1218     for (; j<B->i[i+1]; j++) {
1219       for (l=0; l<bs; l++) {
1220         for (ll=0; ll<bs; ll++) {
1221 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1222         }
1223       }
1224       cnt += bs;
1225     }
1226     cnt += (bs-1)*rlen;
1227   }
1228   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1229 
1230   /* store the column values to the file */
1231   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1232   if (!rank) {
1233     MPI_Status status;
1234     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1235     for (i=1; i<size; i++) {
1236       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1237       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1238       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1239       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1240     }
1241     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1242   } else {
1243     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1244     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1245     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1246     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1247   }
1248   ierr = PetscFree(column_values);CHKERRQ(ierr);
1249 
1250   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1251   if (file) {
1252     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1253   }
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatView_MPIBAIJ"
1259 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1260 {
1261   PetscErrorCode ierr;
1262   PetscBool      iascii,isdraw,issocket,isbinary;
1263 
1264   PetscFunctionBegin;
1265   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1266   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1267   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1268   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1269   if (iascii || isdraw || issocket) {
1270     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1271   } else if (isbinary) {
1272     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1273   } else {
1274     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1275   }
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1281 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1282 {
1283   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1284   PetscErrorCode ierr;
1285 
1286   PetscFunctionBegin;
1287 #if defined(PETSC_USE_LOG)
1288   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1289 #endif
1290   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1291   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1292   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1293   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1294 #if defined (PETSC_USE_CTABLE)
1295   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1296 #else
1297   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1298 #endif
1299   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1300   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1301   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1302   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1303   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1304   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1305   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1306   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1307 
1308   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatMult_MPIBAIJ"
1323 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1324 {
1325   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1326   PetscErrorCode ierr;
1327   PetscInt       nt;
1328 
1329   PetscFunctionBegin;
1330   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1331   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1332   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1333   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1334   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1335   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1336   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1337   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 #undef __FUNCT__
1342 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1343 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1344 {
1345   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1346   PetscErrorCode ierr;
1347 
1348   PetscFunctionBegin;
1349   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1350   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1351   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1352   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1358 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1359 {
1360   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1361   PetscErrorCode ierr;
1362   PetscBool      merged;
1363 
1364   PetscFunctionBegin;
1365   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1366   /* do nondiagonal part */
1367   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1368   if (!merged) {
1369     /* send it on its way */
1370     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1371     /* do local part */
1372     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1373     /* receive remote parts: note this assumes the values are not actually */
1374     /* inserted in yy until the next line */
1375     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1376   } else {
1377     /* do local part */
1378     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1379     /* send it on its way */
1380     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1381     /* values actually were received in the Begin() but we need to call this nop */
1382     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383   }
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 #undef __FUNCT__
1388 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1389 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1390 {
1391   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1392   PetscErrorCode ierr;
1393 
1394   PetscFunctionBegin;
1395   /* do nondiagonal part */
1396   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1397   /* send it on its way */
1398   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1399   /* do local part */
1400   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1401   /* receive remote parts: note this assumes the values are not actually */
1402   /* inserted in yy until the next line, which is true for my implementation*/
1403   /* but is not perhaps always true. */
1404   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 /*
1409   This only works correctly for square matrices where the subblock A->A is the
1410    diagonal block
1411 */
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1414 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1415 {
1416   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1417   PetscErrorCode ierr;
1418 
1419   PetscFunctionBegin;
1420   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1421   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatScale_MPIBAIJ"
1427 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1428 {
1429   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1430   PetscErrorCode ierr;
1431 
1432   PetscFunctionBegin;
1433   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1434   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 #undef __FUNCT__
1439 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1440 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1441 {
1442   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1443   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1444   PetscErrorCode ierr;
1445   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1446   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1447   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1448 
1449   PetscFunctionBegin;
1450   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1451   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1452   mat->getrowactive = PETSC_TRUE;
1453 
1454   if (!mat->rowvalues && (idx || v)) {
1455     /*
1456         allocate enough space to hold information from the longest row.
1457     */
1458     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1459     PetscInt     max = 1,mbs = mat->mbs,tmp;
1460     for (i=0; i<mbs; i++) {
1461       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1462       if (max < tmp) { max = tmp; }
1463     }
1464     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1465   }
1466   lrow = row - brstart;
1467 
1468   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1469   if (!v)   {pvA = 0; pvB = 0;}
1470   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1471   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1472   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1473   nztot = nzA + nzB;
1474 
1475   cmap  = mat->garray;
1476   if (v  || idx) {
1477     if (nztot) {
1478       /* Sort by increasing column numbers, assuming A and B already sorted */
1479       PetscInt imark = -1;
1480       if (v) {
1481         *v = v_p = mat->rowvalues;
1482         for (i=0; i<nzB; i++) {
1483           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1484           else break;
1485         }
1486         imark = i;
1487         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1488         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1489       }
1490       if (idx) {
1491         *idx = idx_p = mat->rowindices;
1492         if (imark > -1) {
1493           for (i=0; i<imark; i++) {
1494             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1495           }
1496         } else {
1497           for (i=0; i<nzB; i++) {
1498             if (cmap[cworkB[i]/bs] < cstart)
1499               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1500             else break;
1501           }
1502           imark = i;
1503         }
1504         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1505         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1506       }
1507     } else {
1508       if (idx) *idx = 0;
1509       if (v)   *v   = 0;
1510     }
1511   }
1512   *nz = nztot;
1513   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1514   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1520 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1521 {
1522   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1523 
1524   PetscFunctionBegin;
1525   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1526   baij->getrowactive = PETSC_FALSE;
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1532 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1533 {
1534   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1535   PetscErrorCode ierr;
1536 
1537   PetscFunctionBegin;
1538   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1539   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1545 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1546 {
1547   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1548   Mat            A = a->A,B = a->B;
1549   PetscErrorCode ierr;
1550   PetscReal      isend[5],irecv[5];
1551 
1552   PetscFunctionBegin;
1553   info->block_size     = (PetscReal)matin->rmap->bs;
1554   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1555   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1556   isend[3] = info->memory;  isend[4] = info->mallocs;
1557   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1558   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1559   isend[3] += info->memory;  isend[4] += info->mallocs;
1560   if (flag == MAT_LOCAL) {
1561     info->nz_used      = isend[0];
1562     info->nz_allocated = isend[1];
1563     info->nz_unneeded  = isend[2];
1564     info->memory       = isend[3];
1565     info->mallocs      = isend[4];
1566   } else if (flag == MAT_GLOBAL_MAX) {
1567     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1568     info->nz_used      = irecv[0];
1569     info->nz_allocated = irecv[1];
1570     info->nz_unneeded  = irecv[2];
1571     info->memory       = irecv[3];
1572     info->mallocs      = irecv[4];
1573   } else if (flag == MAT_GLOBAL_SUM) {
1574     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1575     info->nz_used      = irecv[0];
1576     info->nz_allocated = irecv[1];
1577     info->nz_unneeded  = irecv[2];
1578     info->memory       = irecv[3];
1579     info->mallocs      = irecv[4];
1580   } else {
1581     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1582   }
1583   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1584   info->fill_ratio_needed = 0;
1585   info->factor_mallocs    = 0;
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1591 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1592 {
1593   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1594   PetscErrorCode ierr;
1595 
1596   PetscFunctionBegin;
1597   switch (op) {
1598   case MAT_NEW_NONZERO_LOCATIONS:
1599   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1600   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1601   case MAT_KEEP_NONZERO_PATTERN:
1602   case MAT_NEW_NONZERO_LOCATION_ERR:
1603     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1604     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1605     break;
1606   case MAT_ROW_ORIENTED:
1607     a->roworiented = flg;
1608     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1609     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1610     break;
1611   case MAT_NEW_DIAGONALS:
1612     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1613     break;
1614   case MAT_IGNORE_OFF_PROC_ENTRIES:
1615     a->donotstash = flg;
1616     break;
1617   case MAT_USE_HASH_TABLE:
1618     a->ht_flag = flg;
1619     break;
1620   case MAT_SYMMETRIC:
1621   case MAT_STRUCTURALLY_SYMMETRIC:
1622   case MAT_HERMITIAN:
1623   case MAT_SYMMETRY_ETERNAL:
1624     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1625     break;
1626   default:
1627     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1628   }
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1634 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1635 {
1636   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1637   Mat_SeqBAIJ    *Aloc;
1638   Mat            B;
1639   PetscErrorCode ierr;
1640   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1641   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1642   MatScalar      *a;
1643 
1644   PetscFunctionBegin;
1645   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1646   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1647     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1648     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1649     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1650     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1651   } else {
1652     B = *matout;
1653   }
1654 
1655   /* copy over the A part */
1656   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1657   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1658   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1659 
1660   for (i=0; i<mbs; i++) {
1661     rvals[0] = bs*(baij->rstartbs + i);
1662     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1663     for (j=ai[i]; j<ai[i+1]; j++) {
1664       col = (baij->cstartbs+aj[j])*bs;
1665       for (k=0; k<bs; k++) {
1666         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1667         col++; a += bs;
1668       }
1669     }
1670   }
1671   /* copy over the B part */
1672   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1673   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1674   for (i=0; i<mbs; i++) {
1675     rvals[0] = bs*(baij->rstartbs + i);
1676     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1677     for (j=ai[i]; j<ai[i+1]; j++) {
1678       col = baij->garray[aj[j]]*bs;
1679       for (k=0; k<bs; k++) {
1680         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1681         col++; a += bs;
1682       }
1683     }
1684   }
1685   ierr = PetscFree(rvals);CHKERRQ(ierr);
1686   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1687   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1688 
1689   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1690     *matout = B;
1691   } else {
1692     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1693   }
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1699 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1700 {
1701   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1702   Mat            a = baij->A,b = baij->B;
1703   PetscErrorCode ierr;
1704   PetscInt       s1,s2,s3;
1705 
1706   PetscFunctionBegin;
1707   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1708   if (rr) {
1709     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1710     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1711     /* Overlap communication with computation. */
1712     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1713   }
1714   if (ll) {
1715     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1716     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1717     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1718   }
1719   /* scale  the diagonal block */
1720   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1721 
1722   if (rr) {
1723     /* Do a scatter end and then right scale the off-diagonal block */
1724     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1725     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1726   }
1727 
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1733 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1734 {
1735   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1736   PetscErrorCode    ierr;
1737   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1738   PetscInt          i,*owners = A->rmap->range;
1739   PetscInt          *nprocs,j,idx,nsends,row;
1740   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1741   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1742   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1743   MPI_Comm          comm = ((PetscObject)A)->comm;
1744   MPI_Request       *send_waits,*recv_waits;
1745   MPI_Status        recv_status,*send_status;
1746   const PetscScalar *xx;
1747   PetscScalar       *bb;
1748 #if defined(PETSC_DEBUG)
1749   PetscBool         found = PETSC_FALSE;
1750 #endif
1751 
1752   PetscFunctionBegin;
1753   /*  first count number of contributors to each processor */
1754   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1755   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1756   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1757   j = 0;
1758   for (i=0; i<N; i++) {
1759     if (lastidx > (idx = rows[i])) j = 0;
1760     lastidx = idx;
1761     for (; j<size; j++) {
1762       if (idx >= owners[j] && idx < owners[j+1]) {
1763         nprocs[2*j]++;
1764         nprocs[2*j+1] = 1;
1765         owner[i] = j;
1766 #if defined(PETSC_DEBUG)
1767         found = PETSC_TRUE;
1768 #endif
1769         break;
1770       }
1771     }
1772 #if defined(PETSC_DEBUG)
1773     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1774     found = PETSC_FALSE;
1775 #endif
1776   }
1777   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1778 
1779   if (A->nooffproczerorows) {
1780     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1781     nrecvs = nsends;
1782     nmax   = N;
1783   } else {
1784     /* inform other processors of number of messages and max length*/
1785     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1786   }
1787 
1788   /* post receives:   */
1789   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1790   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1791   for (i=0; i<nrecvs; i++) {
1792     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1793   }
1794 
1795   /* do sends:
1796      1) starts[i] gives the starting index in svalues for stuff going to
1797      the ith processor
1798   */
1799   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1800   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1801   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1802   starts[0]  = 0;
1803   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1804   for (i=0; i<N; i++) {
1805     svalues[starts[owner[i]]++] = rows[i];
1806   }
1807 
1808   starts[0] = 0;
1809   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1810   count = 0;
1811   for (i=0; i<size; i++) {
1812     if (nprocs[2*i+1]) {
1813       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1814     }
1815   }
1816   ierr = PetscFree(starts);CHKERRQ(ierr);
1817 
1818   base = owners[rank];
1819 
1820   /*  wait on receives */
1821   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1822   count  = nrecvs;
1823   slen = 0;
1824   while (count) {
1825     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1826     /* unpack receives into our local space */
1827     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1828     source[imdex]  = recv_status.MPI_SOURCE;
1829     lens[imdex]    = n;
1830     slen          += n;
1831     count--;
1832   }
1833   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1834 
1835   /* move the data into the send scatter */
1836   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1837   count = 0;
1838   for (i=0; i<nrecvs; i++) {
1839     values = rvalues + i*nmax;
1840     for (j=0; j<lens[i]; j++) {
1841       lrows[count++] = values[j] - base;
1842     }
1843   }
1844   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1845   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1846   ierr = PetscFree(owner);CHKERRQ(ierr);
1847   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1848 
1849   /* fix right hand side if needed */
1850   if (x && b) {
1851     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1852     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1853     for (i=0; i<slen; i++) {
1854       bb[lrows[i]] = diag*xx[lrows[i]];
1855     }
1856     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1857     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1858   }
1859 
1860   /* actually zap the local rows */
1861   /*
1862         Zero the required rows. If the "diagonal block" of the matrix
1863      is square and the user wishes to set the diagonal we use separate
1864      code so that MatSetValues() is not called for each diagonal allocating
1865      new memory, thus calling lots of mallocs and slowing things down.
1866 
1867   */
1868   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1869   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1870   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1871     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1872   } else if (diag != 0.0) {
1873     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1874     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1875        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1876     for (i=0; i<slen; i++) {
1877       row  = lrows[i] + rstart_bs;
1878       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1879     }
1880     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1881     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1882   } else {
1883     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1884   }
1885 
1886   ierr = PetscFree(lrows);CHKERRQ(ierr);
1887 
1888   /* wait on sends */
1889   if (nsends) {
1890     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1891     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1892     ierr = PetscFree(send_status);CHKERRQ(ierr);
1893   }
1894   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1895   ierr = PetscFree(svalues);CHKERRQ(ierr);
1896 
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 #undef __FUNCT__
1901 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1902 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1903 {
1904   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1905   PetscErrorCode ierr;
1906 
1907   PetscFunctionBegin;
1908   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatEqual_MPIBAIJ"
1916 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1917 {
1918   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1919   Mat            a,b,c,d;
1920   PetscBool      flg;
1921   PetscErrorCode ierr;
1922 
1923   PetscFunctionBegin;
1924   a = matA->A; b = matA->B;
1925   c = matB->A; d = matB->B;
1926 
1927   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1928   if (flg) {
1929     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1930   }
1931   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatCopy_MPIBAIJ"
1937 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1938 {
1939   PetscErrorCode ierr;
1940   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1941   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1942 
1943   PetscFunctionBegin;
1944   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1945   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1946     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1947   } else {
1948     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1949     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1950   }
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1956 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1957 {
1958   PetscErrorCode ierr;
1959 
1960   PetscFunctionBegin;
1961   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1962   PetscFunctionReturn(0);
1963 }
1964 
1965 #undef __FUNCT__
1966 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1967 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1968 {
1969   PetscErrorCode ierr;
1970   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1971   PetscBLASInt   bnz,one=1;
1972   Mat_SeqBAIJ    *x,*y;
1973 
1974   PetscFunctionBegin;
1975   if (str == SAME_NONZERO_PATTERN) {
1976     PetscScalar alpha = a;
1977     x = (Mat_SeqBAIJ *)xx->A->data;
1978     y = (Mat_SeqBAIJ *)yy->A->data;
1979     bnz = PetscBLASIntCast(x->nz);
1980     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1981     x = (Mat_SeqBAIJ *)xx->B->data;
1982     y = (Mat_SeqBAIJ *)yy->B->data;
1983     bnz = PetscBLASIntCast(x->nz);
1984     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1985   } else {
1986     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1987   }
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 #undef __FUNCT__
1992 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1993 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1994 {
1995   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1996   PetscInt rbs,cbs;
1997   PetscErrorCode ierr;
1998 
1999   PetscFunctionBegin;
2000   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2001   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2002   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2003   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2004   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2005   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2011 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2012 {
2013   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2014   PetscErrorCode ierr;
2015 
2016   PetscFunctionBegin;
2017   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2018   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 #undef __FUNCT__
2023 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2024 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2025 {
2026   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2027   PetscErrorCode ierr;
2028 
2029   PetscFunctionBegin;
2030   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2031   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2037 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2038 {
2039   PetscErrorCode ierr;
2040   IS             iscol_local;
2041   PetscInt       csize;
2042 
2043   PetscFunctionBegin;
2044   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2045   if (call == MAT_REUSE_MATRIX) {
2046     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2047     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2048   } else {
2049     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2050   }
2051   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2052   if (call == MAT_INITIAL_MATRIX) {
2053     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2054     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2055   }
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 #undef __FUNCT__
2060 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2061 /*
2062     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2063   in local and then by concatenating the local matrices the end result.
2064   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2065 */
2066 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2067 {
2068   PetscErrorCode ierr;
2069   PetscMPIInt    rank,size;
2070   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2071   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2072   Mat            *local,M,Mreuse;
2073   MatScalar      *vwork,*aa;
2074   MPI_Comm       comm = ((PetscObject)mat)->comm;
2075   Mat_SeqBAIJ    *aij;
2076 
2077 
2078   PetscFunctionBegin;
2079   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2080   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2081 
2082   if (call ==  MAT_REUSE_MATRIX) {
2083     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2084     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2085     local = &Mreuse;
2086     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2087   } else {
2088     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2089     Mreuse = *local;
2090     ierr   = PetscFree(local);CHKERRQ(ierr);
2091   }
2092 
2093   /*
2094       m - number of local rows
2095       n - number of columns (same on all processors)
2096       rstart - first row in new global matrix generated
2097   */
2098   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2099   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2100   m    = m/bs;
2101   n    = n/bs;
2102 
2103   if (call == MAT_INITIAL_MATRIX) {
2104     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2105     ii  = aij->i;
2106     jj  = aij->j;
2107 
2108     /*
2109         Determine the number of non-zeros in the diagonal and off-diagonal
2110         portions of the matrix in order to do correct preallocation
2111     */
2112 
2113     /* first get start and end of "diagonal" columns */
2114     if (csize == PETSC_DECIDE) {
2115       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2116       if (mglobal == n*bs) { /* square matrix */
2117 	nlocal = m;
2118       } else {
2119         nlocal = n/size + ((n % size) > rank);
2120       }
2121     } else {
2122       nlocal = csize/bs;
2123     }
2124     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2125     rstart = rend - nlocal;
2126     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2127 
2128     /* next, compute all the lengths */
2129     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2130     olens = dlens + m;
2131     for (i=0; i<m; i++) {
2132       jend = ii[i+1] - ii[i];
2133       olen = 0;
2134       dlen = 0;
2135       for (j=0; j<jend; j++) {
2136         if (*jj < rstart || *jj >= rend) olen++;
2137         else dlen++;
2138         jj++;
2139       }
2140       olens[i] = olen;
2141       dlens[i] = dlen;
2142     }
2143     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2144     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2145     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2146     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2147     ierr = PetscFree(dlens);CHKERRQ(ierr);
2148   } else {
2149     PetscInt ml,nl;
2150 
2151     M = *newmat;
2152     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2153     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2154     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2155     /*
2156          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2157        rather than the slower MatSetValues().
2158     */
2159     M->was_assembled = PETSC_TRUE;
2160     M->assembled     = PETSC_FALSE;
2161   }
2162   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2163   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2164   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2165   ii  = aij->i;
2166   jj  = aij->j;
2167   aa  = aij->a;
2168   for (i=0; i<m; i++) {
2169     row   = rstart/bs + i;
2170     nz    = ii[i+1] - ii[i];
2171     cwork = jj;     jj += nz;
2172     vwork = aa;     aa += nz;
2173     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2174   }
2175 
2176   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2177   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2178   *newmat = M;
2179 
2180   /* save submatrix used in processor for next request */
2181   if (call ==  MAT_INITIAL_MATRIX) {
2182     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2183     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2184   }
2185 
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 #undef __FUNCT__
2190 #define __FUNCT__ "MatPermute_MPIBAIJ"
2191 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2192 {
2193   MPI_Comm       comm,pcomm;
2194   PetscInt       first,local_size,nrows;
2195   const PetscInt *rows;
2196   PetscMPIInt    size;
2197   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2198   PetscErrorCode ierr;
2199 
2200   PetscFunctionBegin;
2201   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2202   /* make a collective version of 'rowp' */
2203   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2204   if (pcomm==comm) {
2205     crowp = rowp;
2206   } else {
2207     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2208     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2209     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2210     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2211   }
2212   /* collect the global row permutation and invert it */
2213   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2214   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2215   if (pcomm!=comm) {
2216     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2217   }
2218   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2219   /* get the local target indices */
2220   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2221   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2222   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2223   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2224   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2225   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2226   /* the column permutation is so much easier;
2227      make a local version of 'colp' and invert it */
2228   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2229   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2230   if (size==1) {
2231     lcolp = colp;
2232   } else {
2233     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2234     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2235     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2236   }
2237   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2238   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2239   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2240   if (size>1) {
2241     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2242     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2243   }
2244   /* now we just get the submatrix */
2245   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2246   /* clean up */
2247   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2248   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2254 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2255 {
2256   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2257   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2258 
2259   PetscFunctionBegin;
2260   if (nghosts) { *nghosts = B->nbs;}
2261   if (ghosts) {*ghosts = baij->garray;}
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2266 
2267 #undef __FUNCT__
2268 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2269 /*
2270     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2271 */
2272 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2273 {
2274   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2275   PetscErrorCode        ierr;
2276   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2277   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2278   const PetscInt        *is;
2279   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2280   PetscInt              *rowhit,M,cstart,cend,colb;
2281   PetscInt              *columnsforrow,l;
2282   IS                    *isa;
2283   PetscBool              done,flg;
2284   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2285   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2286 
2287   PetscFunctionBegin;
2288   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2289   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2290 
2291   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2292   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2293   M                = mat->rmap->n/bs;
2294   cstart           = mat->cmap->rstart/bs;
2295   cend             = mat->cmap->rend/bs;
2296   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2297   c->N             = mat->cmap->N/bs;
2298   c->m             = mat->rmap->n/bs;
2299   c->rstart        = mat->rmap->rstart/bs;
2300 
2301   c->ncolors       = nis;
2302   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2303   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2304   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2305   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2306   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2307   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2308 
2309   /* Allow access to data structures of local part of matrix */
2310   if (!baij->colmap) {
2311     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2312   }
2313   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2314   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2315 
2316   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2317   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2318 
2319   for (i=0; i<nis; i++) {
2320     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2321     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2322     c->ncolumns[i] = n;
2323     if (n) {
2324       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2325       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2326       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2327     } else {
2328       c->columns[i]  = 0;
2329     }
2330 
2331     if (ctype == IS_COLORING_GLOBAL){
2332       /* Determine the total (parallel) number of columns of this color */
2333       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2334       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2335 
2336       nn   = PetscMPIIntCast(n);
2337       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2338       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2339       if (!nctot) {
2340         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2341       }
2342 
2343       disp[0] = 0;
2344       for (j=1; j<size; j++) {
2345         disp[j] = disp[j-1] + ncolsonproc[j-1];
2346       }
2347 
2348       /* Get complete list of columns for color on each processor */
2349       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2350       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2351       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2352     } else if (ctype == IS_COLORING_GHOSTED){
2353       /* Determine local number of columns of this color on this process, including ghost points */
2354       nctot = n;
2355       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2356       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2357     } else {
2358       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2359     }
2360 
2361     /*
2362        Mark all rows affect by these columns
2363     */
2364     /* Temporary option to allow for debugging/testing */
2365     flg  = PETSC_FALSE;
2366     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2367     if (!flg) {/*-----------------------------------------------------------------------------*/
2368       /* crude, fast version */
2369       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2370       /* loop over columns*/
2371       for (j=0; j<nctot; j++) {
2372         if (ctype == IS_COLORING_GHOSTED) {
2373           col = ltog[cols[j]];
2374         } else {
2375           col  = cols[j];
2376         }
2377         if (col >= cstart && col < cend) {
2378           /* column is in diagonal block of matrix */
2379           rows = A_cj + A_ci[col-cstart];
2380           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2381         } else {
2382 #if defined (PETSC_USE_CTABLE)
2383           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2384 	  colb --;
2385 #else
2386           colb = baij->colmap[col] - 1;
2387 #endif
2388           if (colb == -1) {
2389             m = 0;
2390           } else {
2391             colb = colb/bs;
2392             rows = B_cj + B_ci[colb];
2393             m    = B_ci[colb+1] - B_ci[colb];
2394           }
2395         }
2396         /* loop over columns marking them in rowhit */
2397         for (k=0; k<m; k++) {
2398           rowhit[*rows++] = col + 1;
2399         }
2400       }
2401 
2402       /* count the number of hits */
2403       nrows = 0;
2404       for (j=0; j<M; j++) {
2405         if (rowhit[j]) nrows++;
2406       }
2407       c->nrows[i]         = nrows;
2408       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2409       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2410       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2411       nrows = 0;
2412       for (j=0; j<M; j++) {
2413         if (rowhit[j]) {
2414           c->rows[i][nrows]           = j;
2415           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2416           nrows++;
2417         }
2418       }
2419     } else {/*-------------------------------------------------------------------------------*/
2420       /* slow version, using rowhit as a linked list */
2421       PetscInt currentcol,fm,mfm;
2422       rowhit[M] = M;
2423       nrows     = 0;
2424       /* loop over columns*/
2425       for (j=0; j<nctot; j++) {
2426         if (ctype == IS_COLORING_GHOSTED) {
2427           col = ltog[cols[j]];
2428         } else {
2429           col  = cols[j];
2430         }
2431         if (col >= cstart && col < cend) {
2432           /* column is in diagonal block of matrix */
2433           rows = A_cj + A_ci[col-cstart];
2434           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2435         } else {
2436 #if defined (PETSC_USE_CTABLE)
2437           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2438           colb --;
2439 #else
2440           colb = baij->colmap[col] - 1;
2441 #endif
2442           if (colb == -1) {
2443             m = 0;
2444           } else {
2445             colb = colb/bs;
2446             rows = B_cj + B_ci[colb];
2447             m    = B_ci[colb+1] - B_ci[colb];
2448           }
2449         }
2450 
2451         /* loop over columns marking them in rowhit */
2452         fm    = M; /* fm points to first entry in linked list */
2453         for (k=0; k<m; k++) {
2454           currentcol = *rows++;
2455 	  /* is it already in the list? */
2456           do {
2457             mfm  = fm;
2458             fm   = rowhit[fm];
2459           } while (fm < currentcol);
2460           /* not in list so add it */
2461           if (fm != currentcol) {
2462             nrows++;
2463             columnsforrow[currentcol] = col;
2464             /* next three lines insert new entry into linked list */
2465             rowhit[mfm]               = currentcol;
2466             rowhit[currentcol]        = fm;
2467             fm                        = currentcol;
2468             /* fm points to present position in list since we know the columns are sorted */
2469           } else {
2470             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2471           }
2472         }
2473       }
2474       c->nrows[i]         = nrows;
2475       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2476       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2477       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2478       /* now store the linked list of rows into c->rows[i] */
2479       nrows = 0;
2480       fm    = rowhit[M];
2481       do {
2482         c->rows[i][nrows]            = fm;
2483         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2484         fm                           = rowhit[fm];
2485       } while (fm < M);
2486     } /* ---------------------------------------------------------------------------------------*/
2487     ierr = PetscFree(cols);CHKERRQ(ierr);
2488   }
2489 
2490   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2491   /*
2492        vscale will contain the "diagonal" on processor scalings followed by the off processor
2493   */
2494   if (ctype == IS_COLORING_GLOBAL) {
2495     PetscInt *garray;
2496     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2497     for (i=0; i<baij->B->cmap->n/bs; i++) {
2498       for (j=0; j<bs; j++) {
2499         garray[i*bs+j] = bs*baij->garray[i]+j;
2500       }
2501     }
2502     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2503     ierr = PetscFree(garray);CHKERRQ(ierr);
2504     CHKMEMQ;
2505     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2506     for (k=0; k<c->ncolors; k++) {
2507       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2508       for (l=0; l<c->nrows[k]; l++) {
2509         col = c->columnsforrow[k][l];
2510         if (col >= cstart && col < cend) {
2511           /* column is in diagonal block of matrix */
2512           colb = col - cstart;
2513         } else {
2514           /* column  is in "off-processor" part */
2515 #if defined (PETSC_USE_CTABLE)
2516           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2517           colb --;
2518 #else
2519           colb = baij->colmap[col] - 1;
2520 #endif
2521           colb = colb/bs;
2522           colb += cend - cstart;
2523         }
2524         c->vscaleforrow[k][l] = colb;
2525       }
2526     }
2527   } else if (ctype == IS_COLORING_GHOSTED) {
2528     /* Get gtol mapping */
2529     PetscInt N = mat->cmap->N, *gtol;
2530     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2531     for (i=0; i<N; i++) gtol[i] = -1;
2532     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2533 
2534     c->vscale = 0; /* will be created in MatFDColoringApply() */
2535     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2536     for (k=0; k<c->ncolors; k++) {
2537       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2538       for (l=0; l<c->nrows[k]; l++) {
2539         col = c->columnsforrow[k][l];      /* global column index */
2540         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2541       }
2542     }
2543     ierr = PetscFree(gtol);CHKERRQ(ierr);
2544   }
2545   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2546 
2547   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2548   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2549   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2550   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2551     CHKMEMQ;
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 #undef __FUNCT__
2556 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2557 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2558 {
2559   Mat            B;
2560   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2561   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2562   Mat_SeqAIJ     *b;
2563   PetscErrorCode ierr;
2564   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2565   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2566   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2567 
2568   PetscFunctionBegin;
2569   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2570   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2571 
2572   /* ----------------------------------------------------------------
2573      Tell every processor the number of nonzeros per row
2574   */
2575   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2576   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2577     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2578   }
2579   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2580   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2581   displs     = recvcounts + size;
2582   for (i=0; i<size; i++) {
2583     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2584     displs[i]     = A->rmap->range[i]/bs;
2585   }
2586 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2587   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2588 #else
2589   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2590 #endif
2591   /* ---------------------------------------------------------------
2592      Create the sequential matrix of the same type as the local block diagonal
2593   */
2594   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2595   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2596   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2597   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2598   b = (Mat_SeqAIJ *)B->data;
2599 
2600   /*--------------------------------------------------------------------
2601     Copy my part of matrix column indices over
2602   */
2603   sendcount  = ad->nz + bd->nz;
2604   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2605   a_jsendbuf = ad->j;
2606   b_jsendbuf = bd->j;
2607   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2608   cnt        = 0;
2609   for (i=0; i<n; i++) {
2610 
2611     /* put in lower diagonal portion */
2612     m = bd->i[i+1] - bd->i[i];
2613     while (m > 0) {
2614       /* is it above diagonal (in bd (compressed) numbering) */
2615       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2616       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2617       m--;
2618     }
2619 
2620     /* put in diagonal portion */
2621     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2622       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2623     }
2624 
2625     /* put in upper diagonal portion */
2626     while (m-- > 0) {
2627       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2628     }
2629   }
2630   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2631 
2632   /*--------------------------------------------------------------------
2633     Gather all column indices to all processors
2634   */
2635   for (i=0; i<size; i++) {
2636     recvcounts[i] = 0;
2637     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2638       recvcounts[i] += lens[j];
2639     }
2640   }
2641   displs[0]  = 0;
2642   for (i=1; i<size; i++) {
2643     displs[i] = displs[i-1] + recvcounts[i-1];
2644   }
2645 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2646   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2647 #else
2648   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2649 #endif
2650   /*--------------------------------------------------------------------
2651     Assemble the matrix into useable form (note numerical values not yet set)
2652   */
2653   /* set the b->ilen (length of each row) values */
2654   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2655   /* set the b->i indices */
2656   b->i[0] = 0;
2657   for (i=1; i<=A->rmap->N/bs; i++) {
2658     b->i[i] = b->i[i-1] + lens[i-1];
2659   }
2660   ierr = PetscFree(lens);CHKERRQ(ierr);
2661   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2662   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2663   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2664 
2665   if (A->symmetric){
2666     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2667   } else if (A->hermitian) {
2668     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2669   } else if (A->structurally_symmetric) {
2670     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2671   }
2672   *newmat = B;
2673   PetscFunctionReturn(0);
2674 }
2675 
2676 #undef __FUNCT__
2677 #define __FUNCT__ "MatSOR_MPIBAIJ"
2678 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2679 {
2680   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2681   PetscErrorCode ierr;
2682   Vec            bb1 = 0;
2683 
2684   PetscFunctionBegin;
2685   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2686     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2687   }
2688 
2689   if (flag == SOR_APPLY_UPPER) {
2690     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2691     PetscFunctionReturn(0);
2692   }
2693 
2694   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2695     if (flag & SOR_ZERO_INITIAL_GUESS) {
2696       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2697       its--;
2698     }
2699 
2700     while (its--) {
2701       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2702       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2703 
2704       /* update rhs: bb1 = bb - B*x */
2705       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2706       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2707 
2708       /* local sweep */
2709       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2710     }
2711   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2712     if (flag & SOR_ZERO_INITIAL_GUESS) {
2713       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2714       its--;
2715     }
2716     while (its--) {
2717       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2718       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2719 
2720       /* update rhs: bb1 = bb - B*x */
2721       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2722       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2723 
2724       /* local sweep */
2725       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2726     }
2727   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2728     if (flag & SOR_ZERO_INITIAL_GUESS) {
2729       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2730       its--;
2731     }
2732     while (its--) {
2733       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2734       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2735 
2736       /* update rhs: bb1 = bb - B*x */
2737       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2738       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2739 
2740       /* local sweep */
2741       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2742     }
2743   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2744 
2745   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2750 
2751 #undef __FUNCT__
2752 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2753 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,PetscScalar **values)
2754 {
2755   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2756   PetscErrorCode ierr;
2757 
2758   PetscFunctionBegin;
2759   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2760   PetscFunctionReturn(0);
2761 }
2762 
2763 
2764 /* -------------------------------------------------------------------*/
2765 static struct _MatOps MatOps_Values = {
2766        MatSetValues_MPIBAIJ,
2767        MatGetRow_MPIBAIJ,
2768        MatRestoreRow_MPIBAIJ,
2769        MatMult_MPIBAIJ,
2770 /* 4*/ MatMultAdd_MPIBAIJ,
2771        MatMultTranspose_MPIBAIJ,
2772        MatMultTransposeAdd_MPIBAIJ,
2773        0,
2774        0,
2775        0,
2776 /*10*/ 0,
2777        0,
2778        0,
2779        MatSOR_MPIBAIJ,
2780        MatTranspose_MPIBAIJ,
2781 /*15*/ MatGetInfo_MPIBAIJ,
2782        MatEqual_MPIBAIJ,
2783        MatGetDiagonal_MPIBAIJ,
2784        MatDiagonalScale_MPIBAIJ,
2785        MatNorm_MPIBAIJ,
2786 /*20*/ MatAssemblyBegin_MPIBAIJ,
2787        MatAssemblyEnd_MPIBAIJ,
2788        MatSetOption_MPIBAIJ,
2789        MatZeroEntries_MPIBAIJ,
2790 /*24*/ MatZeroRows_MPIBAIJ,
2791        0,
2792        0,
2793        0,
2794        0,
2795 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2796        0,
2797        0,
2798        0,
2799        0,
2800 /*34*/ MatDuplicate_MPIBAIJ,
2801        0,
2802        0,
2803        0,
2804        0,
2805 /*39*/ MatAXPY_MPIBAIJ,
2806        MatGetSubMatrices_MPIBAIJ,
2807        MatIncreaseOverlap_MPIBAIJ,
2808        MatGetValues_MPIBAIJ,
2809        MatCopy_MPIBAIJ,
2810 /*44*/ 0,
2811        MatScale_MPIBAIJ,
2812        0,
2813        0,
2814        0,
2815 /*49*/ MatSetBlockSize_MPIBAIJ,
2816        0,
2817        0,
2818        0,
2819        0,
2820 /*54*/ MatFDColoringCreate_MPIBAIJ,
2821        0,
2822        MatSetUnfactored_MPIBAIJ,
2823        MatPermute_MPIBAIJ,
2824        MatSetValuesBlocked_MPIBAIJ,
2825 /*59*/ MatGetSubMatrix_MPIBAIJ,
2826        MatDestroy_MPIBAIJ,
2827        MatView_MPIBAIJ,
2828        0,
2829        0,
2830 /*64*/ 0,
2831        0,
2832        0,
2833        0,
2834        0,
2835 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2836        0,
2837        0,
2838        0,
2839        0,
2840 /*74*/ 0,
2841        MatFDColoringApply_BAIJ,
2842        0,
2843        0,
2844        0,
2845 /*79*/ 0,
2846        0,
2847        0,
2848        0,
2849        MatLoad_MPIBAIJ,
2850 /*84*/ 0,
2851        0,
2852        0,
2853        0,
2854        0,
2855 /*89*/ 0,
2856        0,
2857        0,
2858        0,
2859        0,
2860 /*94*/ 0,
2861        0,
2862        0,
2863        0,
2864        0,
2865 /*99*/ 0,
2866        0,
2867        0,
2868        0,
2869        0,
2870 /*104*/0,
2871        MatRealPart_MPIBAIJ,
2872        MatImaginaryPart_MPIBAIJ,
2873        0,
2874        0,
2875 /*109*/0,
2876        0,
2877        0,
2878        0,
2879        0,
2880 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2881        0,
2882        MatGetGhosts_MPIBAIJ,
2883        0,
2884        0,
2885 /*119*/0,
2886        0,
2887        0,
2888        0,
2889        0,
2890 /*124*/0,
2891        0,
2892        MatInvertBlockDiagonal_MPIBAIJ
2893 };
2894 
2895 EXTERN_C_BEGIN
2896 #undef __FUNCT__
2897 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2898 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2899 {
2900   PetscFunctionBegin;
2901   *a = ((Mat_MPIBAIJ *)A->data)->A;
2902   PetscFunctionReturn(0);
2903 }
2904 EXTERN_C_END
2905 
2906 EXTERN_C_BEGIN
2907 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2908 EXTERN_C_END
2909 
2910 EXTERN_C_BEGIN
2911 #undef __FUNCT__
2912 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2913 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2914 {
2915   PetscInt       m,rstart,cstart,cend;
2916   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2917   const PetscInt *JJ=0;
2918   PetscScalar    *values=0;
2919   PetscErrorCode ierr;
2920 
2921   PetscFunctionBegin;
2922 
2923   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2924   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2925   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2926   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2927   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2928   m      = B->rmap->n/bs;
2929   rstart = B->rmap->rstart/bs;
2930   cstart = B->cmap->rstart/bs;
2931   cend   = B->cmap->rend/bs;
2932 
2933   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2934   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2935   for (i=0; i<m; i++) {
2936     nz = ii[i+1] - ii[i];
2937     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2938     nz_max = PetscMax(nz_max,nz);
2939     JJ  = jj + ii[i];
2940     for (j=0; j<nz; j++) {
2941       if (*JJ >= cstart) break;
2942       JJ++;
2943     }
2944     d = 0;
2945     for (; j<nz; j++) {
2946       if (*JJ++ >= cend) break;
2947       d++;
2948     }
2949     d_nnz[i] = d;
2950     o_nnz[i] = nz - d;
2951   }
2952   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2953   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2954 
2955   values = (PetscScalar*)V;
2956   if (!values) {
2957     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2958     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2959   }
2960   for (i=0; i<m; i++) {
2961     PetscInt          row    = i + rstart;
2962     PetscInt          ncols  = ii[i+1] - ii[i];
2963     const PetscInt    *icols = jj + ii[i];
2964     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2965     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2966   }
2967 
2968   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2969   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2970   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2971   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2972   PetscFunctionReturn(0);
2973 }
2974 EXTERN_C_END
2975 
2976 #undef __FUNCT__
2977 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2978 /*@C
2979    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2980    (the default parallel PETSc format).
2981 
2982    Collective on MPI_Comm
2983 
2984    Input Parameters:
2985 +  A - the matrix
2986 .  bs - the block size
2987 .  i - the indices into j for the start of each local row (starts with zero)
2988 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2989 -  v - optional values in the matrix
2990 
2991    Level: developer
2992 
2993 .keywords: matrix, aij, compressed row, sparse, parallel
2994 
2995 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2996 @*/
2997 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2998 {
2999   PetscErrorCode ierr;
3000 
3001   PetscFunctionBegin;
3002   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3003   PetscValidType(B,1);
3004   PetscValidLogicalCollectiveInt(B,bs,2);
3005   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3006   PetscFunctionReturn(0);
3007 }
3008 
3009 EXTERN_C_BEGIN
3010 #undef __FUNCT__
3011 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3012 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
3013 {
3014   Mat_MPIBAIJ    *b;
3015   PetscErrorCode ierr;
3016   PetscInt       i, newbs = PetscAbs(bs);
3017 
3018   PetscFunctionBegin;
3019   if (bs < 0) {
3020     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
3021       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3022     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3023     bs   = PetscAbs(bs);
3024   }
3025   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
3026   bs = newbs;
3027 
3028 
3029   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
3030   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3031   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3032   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3033   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3034 
3035   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3036   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3037   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3038   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3039 
3040   if (d_nnz) {
3041     for (i=0; i<B->rmap->n/bs; i++) {
3042       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3043     }
3044   }
3045   if (o_nnz) {
3046     for (i=0; i<B->rmap->n/bs; i++) {
3047       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3048     }
3049   }
3050 
3051   b = (Mat_MPIBAIJ*)B->data;
3052   b->bs2 = bs*bs;
3053   b->mbs = B->rmap->n/bs;
3054   b->nbs = B->cmap->n/bs;
3055   b->Mbs = B->rmap->N/bs;
3056   b->Nbs = B->cmap->N/bs;
3057 
3058   for (i=0; i<=b->size; i++) {
3059     b->rangebs[i] = B->rmap->range[i]/bs;
3060   }
3061   b->rstartbs = B->rmap->rstart/bs;
3062   b->rendbs   = B->rmap->rend/bs;
3063   b->cstartbs = B->cmap->rstart/bs;
3064   b->cendbs   = B->cmap->rend/bs;
3065 
3066   if (!B->preallocated) {
3067     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3068     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3069     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3070     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3071     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3072     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3073     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3074     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3075     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3076   }
3077 
3078   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3079   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3080   B->preallocated = PETSC_TRUE;
3081   ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3082   PetscFunctionReturn(0);
3083 }
3084 EXTERN_C_END
3085 
3086 EXTERN_C_BEGIN
3087 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3088 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3089 EXTERN_C_END
3090 
3091 
3092 EXTERN_C_BEGIN
3093 #undef __FUNCT__
3094 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3095 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3096 {
3097   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3098   PetscErrorCode ierr;
3099   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3100   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3101   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3102 
3103   PetscFunctionBegin;
3104   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3105   ii[0] = 0;
3106   CHKMEMQ;
3107   for (i=0; i<M; i++) {
3108     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3109     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3110     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3111     /* remove one from count of matrix has diagonal */
3112     for (j=id[i]; j<id[i+1]; j++) {
3113       if (jd[j] == i) {ii[i+1]--;break;}
3114     }
3115   CHKMEMQ;
3116   }
3117   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3118   cnt = 0;
3119   for (i=0; i<M; i++) {
3120     for (j=io[i]; j<io[i+1]; j++) {
3121       if (garray[jo[j]] > rstart) break;
3122       jj[cnt++] = garray[jo[j]];
3123   CHKMEMQ;
3124     }
3125     for (k=id[i]; k<id[i+1]; k++) {
3126       if (jd[k] != i) {
3127         jj[cnt++] = rstart + jd[k];
3128   CHKMEMQ;
3129       }
3130     }
3131     for (;j<io[i+1]; j++) {
3132       jj[cnt++] = garray[jo[j]];
3133   CHKMEMQ;
3134     }
3135   }
3136   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3137   PetscFunctionReturn(0);
3138 }
3139 EXTERN_C_END
3140 
3141 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3142 EXTERN_C_BEGIN
3143 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
3144 EXTERN_C_END
3145 
3146 EXTERN_C_BEGIN
3147 #undef __FUNCT__
3148 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3149 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
3150 {
3151   PetscErrorCode ierr;
3152   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3153   Mat            B;
3154   Mat_MPIAIJ     *b;
3155 
3156   PetscFunctionBegin;
3157   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3158 
3159   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3160   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3161   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3162   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3163   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3164   b = (Mat_MPIAIJ*) B->data;
3165 
3166   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3167   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3168   ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3169   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3170   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3171   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3172   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3173   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3174   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3175   if (reuse == MAT_REUSE_MATRIX) {
3176     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3177   } else {
3178    *newmat = B;
3179   }
3180   PetscFunctionReturn(0);
3181 }
3182 EXTERN_C_END
3183 
3184 EXTERN_C_BEGIN
3185 #if defined(PETSC_HAVE_MUMPS)
3186 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3187 #endif
3188 EXTERN_C_END
3189 
3190 /*MC
3191    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3192 
3193    Options Database Keys:
3194 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3195 . -mat_block_size <bs> - set the blocksize used to store the matrix
3196 - -mat_use_hash_table <fact>
3197 
3198   Level: beginner
3199 
3200 .seealso: MatCreateMPIBAIJ
3201 M*/
3202 
3203 EXTERN_C_BEGIN
3204 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*);
3205 EXTERN_C_END
3206 
3207 EXTERN_C_BEGIN
3208 #undef __FUNCT__
3209 #define __FUNCT__ "MatCreate_MPIBAIJ"
3210 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3211 {
3212   Mat_MPIBAIJ    *b;
3213   PetscErrorCode ierr;
3214   PetscBool      flg;
3215 
3216   PetscFunctionBegin;
3217   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3218   B->data = (void*)b;
3219 
3220   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3221   B->assembled  = PETSC_FALSE;
3222 
3223   B->insertmode = NOT_SET_VALUES;
3224   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3225   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3226 
3227   /* build local table of row and column ownerships */
3228   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3229 
3230   /* build cache for off array entries formed */
3231   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3232   b->donotstash  = PETSC_FALSE;
3233   b->colmap      = PETSC_NULL;
3234   b->garray      = PETSC_NULL;
3235   b->roworiented = PETSC_TRUE;
3236 
3237   /* stuff used in block assembly */
3238   b->barray       = 0;
3239 
3240   /* stuff used for matrix vector multiply */
3241   b->lvec         = 0;
3242   b->Mvctx        = 0;
3243 
3244   /* stuff for MatGetRow() */
3245   b->rowindices   = 0;
3246   b->rowvalues    = 0;
3247   b->getrowactive = PETSC_FALSE;
3248 
3249   /* hash table stuff */
3250   b->ht           = 0;
3251   b->hd           = 0;
3252   b->ht_size      = 0;
3253   b->ht_flag      = PETSC_FALSE;
3254   b->ht_fact      = 0;
3255   b->ht_total_ct  = 0;
3256   b->ht_insert_ct = 0;
3257 
3258   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3259   b->ijonly       = PETSC_FALSE;
3260 
3261   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3262     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3263     if (flg) {
3264       PetscReal fact = 1.39;
3265       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3266       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3267       if (fact <= 1.0) fact = 1.39;
3268       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3269       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3270     }
3271   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3272 
3273 #if defined(PETSC_HAVE_MUMPS)
3274   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3275 #endif
3276   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3277                                      "MatConvert_MPIBAIJ_MPIAdj",
3278                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3279   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3280                                      "MatConvert_MPIBAIJ_MPIAIJ",
3281                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3282   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3283                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3284                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3285   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3286                                      "MatStoreValues_MPIBAIJ",
3287                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3288   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3289                                      "MatRetrieveValues_MPIBAIJ",
3290                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3291   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3292                                      "MatGetDiagonalBlock_MPIBAIJ",
3293                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3294   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3295                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3296                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3297   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3298 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3299 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3300   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3301                                      "MatDiagonalScaleLocal_MPIBAIJ",
3302                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3303   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3304                                      "MatSetHashTableFactor_MPIBAIJ",
3305                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3306   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3307                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3308                                       MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3309   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3310   PetscFunctionReturn(0);
3311 }
3312 EXTERN_C_END
3313 
3314 /*MC
3315    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3316 
3317    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3318    and MATMPIBAIJ otherwise.
3319 
3320    Options Database Keys:
3321 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3322 
3323   Level: beginner
3324 
3325 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3326 M*/
3327 
3328 #undef __FUNCT__
3329 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3330 /*@C
3331    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3332    (block compressed row).  For good matrix assembly performance
3333    the user should preallocate the matrix storage by setting the parameters
3334    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3335    performance can be increased by more than a factor of 50.
3336 
3337    Collective on Mat
3338 
3339    Input Parameters:
3340 +  A - the matrix
3341 .  bs   - size of blockk
3342 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3343            submatrix  (same for all local rows)
3344 .  d_nnz - array containing the number of block nonzeros in the various block rows
3345            of the in diagonal portion of the local (possibly different for each block
3346            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3347 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3348            submatrix (same for all local rows).
3349 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3350            off-diagonal portion of the local submatrix (possibly different for
3351            each block row) or PETSC_NULL.
3352 
3353    If the *_nnz parameter is given then the *_nz parameter is ignored
3354 
3355    Options Database Keys:
3356 +   -mat_block_size - size of the blocks to use
3357 -   -mat_use_hash_table <fact>
3358 
3359    Notes:
3360    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3361    than it must be used on all processors that share the object for that argument.
3362 
3363    Storage Information:
3364    For a square global matrix we define each processor's diagonal portion
3365    to be its local rows and the corresponding columns (a square submatrix);
3366    each processor's off-diagonal portion encompasses the remainder of the
3367    local matrix (a rectangular submatrix).
3368 
3369    The user can specify preallocated storage for the diagonal part of
3370    the local submatrix with either d_nz or d_nnz (not both).  Set
3371    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3372    memory allocation.  Likewise, specify preallocated storage for the
3373    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3374 
3375    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3376    the figure below we depict these three local rows and all columns (0-11).
3377 
3378 .vb
3379            0 1 2 3 4 5 6 7 8 9 10 11
3380           -------------------
3381    row 3  |  o o o d d d o o o o o o
3382    row 4  |  o o o d d d o o o o o o
3383    row 5  |  o o o d d d o o o o o o
3384           -------------------
3385 .ve
3386 
3387    Thus, any entries in the d locations are stored in the d (diagonal)
3388    submatrix, and any entries in the o locations are stored in the
3389    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3390    stored simply in the MATSEQBAIJ format for compressed row storage.
3391 
3392    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3393    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3394    In general, for PDE problems in which most nonzeros are near the diagonal,
3395    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3396    or you will get TERRIBLE performance; see the users' manual chapter on
3397    matrices.
3398 
3399    You can call MatGetInfo() to get information on how effective the preallocation was;
3400    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3401    You can also run with the option -info and look for messages with the string
3402    malloc in them to see if additional memory allocation was needed.
3403 
3404    Level: intermediate
3405 
3406 .keywords: matrix, block, aij, compressed row, sparse, parallel
3407 
3408 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3409 @*/
3410 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3411 {
3412   PetscErrorCode ierr;
3413 
3414   PetscFunctionBegin;
3415   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3416   PetscValidType(B,1);
3417   PetscValidLogicalCollectiveInt(B,bs,2);
3418   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3419   PetscFunctionReturn(0);
3420 }
3421 
3422 #undef __FUNCT__
3423 #define __FUNCT__ "MatCreateMPIBAIJ"
3424 /*@C
3425    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3426    (block compressed row).  For good matrix assembly performance
3427    the user should preallocate the matrix storage by setting the parameters
3428    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3429    performance can be increased by more than a factor of 50.
3430 
3431    Collective on MPI_Comm
3432 
3433    Input Parameters:
3434 +  comm - MPI communicator
3435 .  bs   - size of blockk
3436 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3437            This value should be the same as the local size used in creating the
3438            y vector for the matrix-vector product y = Ax.
3439 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3440            This value should be the same as the local size used in creating the
3441            x vector for the matrix-vector product y = Ax.
3442 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3443 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3444 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3445            submatrix  (same for all local rows)
3446 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3447            of the in diagonal portion of the local (possibly different for each block
3448            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3449 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3450            submatrix (same for all local rows).
3451 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3452            off-diagonal portion of the local submatrix (possibly different for
3453            each block row) or PETSC_NULL.
3454 
3455    Output Parameter:
3456 .  A - the matrix
3457 
3458    Options Database Keys:
3459 +   -mat_block_size - size of the blocks to use
3460 -   -mat_use_hash_table <fact>
3461 
3462    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3463    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3464    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3465 
3466    Notes:
3467    If the *_nnz parameter is given then the *_nz parameter is ignored
3468 
3469    A nonzero block is any block that as 1 or more nonzeros in it
3470 
3471    The user MUST specify either the local or global matrix dimensions
3472    (possibly both).
3473 
3474    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3475    than it must be used on all processors that share the object for that argument.
3476 
3477    Storage Information:
3478    For a square global matrix we define each processor's diagonal portion
3479    to be its local rows and the corresponding columns (a square submatrix);
3480    each processor's off-diagonal portion encompasses the remainder of the
3481    local matrix (a rectangular submatrix).
3482 
3483    The user can specify preallocated storage for the diagonal part of
3484    the local submatrix with either d_nz or d_nnz (not both).  Set
3485    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3486    memory allocation.  Likewise, specify preallocated storage for the
3487    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3488 
3489    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3490    the figure below we depict these three local rows and all columns (0-11).
3491 
3492 .vb
3493            0 1 2 3 4 5 6 7 8 9 10 11
3494           -------------------
3495    row 3  |  o o o d d d o o o o o o
3496    row 4  |  o o o d d d o o o o o o
3497    row 5  |  o o o d d d o o o o o o
3498           -------------------
3499 .ve
3500 
3501    Thus, any entries in the d locations are stored in the d (diagonal)
3502    submatrix, and any entries in the o locations are stored in the
3503    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3504    stored simply in the MATSEQBAIJ format for compressed row storage.
3505 
3506    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3507    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3508    In general, for PDE problems in which most nonzeros are near the diagonal,
3509    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3510    or you will get TERRIBLE performance; see the users' manual chapter on
3511    matrices.
3512 
3513    Level: intermediate
3514 
3515 .keywords: matrix, block, aij, compressed row, sparse, parallel
3516 
3517 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3518 @*/
3519 PetscErrorCode  MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3520 {
3521   PetscErrorCode ierr;
3522   PetscMPIInt    size;
3523 
3524   PetscFunctionBegin;
3525   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3526   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3527   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3528   if (size > 1) {
3529     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3530     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3531   } else {
3532     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3533     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3534   }
3535   PetscFunctionReturn(0);
3536 }
3537 
3538 #undef __FUNCT__
3539 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3540 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3541 {
3542   Mat            mat;
3543   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3544   PetscErrorCode ierr;
3545   PetscInt       len=0;
3546 
3547   PetscFunctionBegin;
3548   *newmat       = 0;
3549   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3550   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3551   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3552   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3553 
3554   mat->factortype   = matin->factortype;
3555   mat->preallocated = PETSC_TRUE;
3556   mat->assembled    = PETSC_TRUE;
3557   mat->insertmode   = NOT_SET_VALUES;
3558 
3559   a      = (Mat_MPIBAIJ*)mat->data;
3560   mat->rmap->bs  = matin->rmap->bs;
3561   a->bs2   = oldmat->bs2;
3562   a->mbs   = oldmat->mbs;
3563   a->nbs   = oldmat->nbs;
3564   a->Mbs   = oldmat->Mbs;
3565   a->Nbs   = oldmat->Nbs;
3566 
3567   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3568   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3569 
3570   a->size         = oldmat->size;
3571   a->rank         = oldmat->rank;
3572   a->donotstash   = oldmat->donotstash;
3573   a->roworiented  = oldmat->roworiented;
3574   a->rowindices   = 0;
3575   a->rowvalues    = 0;
3576   a->getrowactive = PETSC_FALSE;
3577   a->barray       = 0;
3578   a->rstartbs     = oldmat->rstartbs;
3579   a->rendbs       = oldmat->rendbs;
3580   a->cstartbs     = oldmat->cstartbs;
3581   a->cendbs       = oldmat->cendbs;
3582 
3583   /* hash table stuff */
3584   a->ht           = 0;
3585   a->hd           = 0;
3586   a->ht_size      = 0;
3587   a->ht_flag      = oldmat->ht_flag;
3588   a->ht_fact      = oldmat->ht_fact;
3589   a->ht_total_ct  = 0;
3590   a->ht_insert_ct = 0;
3591 
3592   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3593   if (oldmat->colmap) {
3594 #if defined (PETSC_USE_CTABLE)
3595   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3596 #else
3597   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3598   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3599   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3600 #endif
3601   } else a->colmap = 0;
3602 
3603   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3604     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3605     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3606     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3607   } else a->garray = 0;
3608 
3609   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3610   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3611   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3612   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3613   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3614 
3615   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3616   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3617   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3618   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3619   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3620   *newmat = mat;
3621 
3622   PetscFunctionReturn(0);
3623 }
3624 
3625 #undef __FUNCT__
3626 #define __FUNCT__ "MatLoad_MPIBAIJ"
3627 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3628 {
3629   PetscErrorCode ierr;
3630   int            fd;
3631   PetscInt       i,nz,j,rstart,rend;
3632   PetscScalar    *vals,*buf;
3633   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3634   MPI_Status     status;
3635   PetscMPIInt    rank,size,maxnz;
3636   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3637   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3638   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3639   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3640   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3641   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3642 
3643   PetscFunctionBegin;
3644   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3645     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3646   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3647 
3648   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3649   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3650   if (!rank) {
3651     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3652     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3653     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3654   }
3655 
3656   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3657 
3658   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3659   M = header[1]; N = header[2];
3660 
3661   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3662   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3663   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3664 
3665   /* If global sizes are set, check if they are consistent with that given in the file */
3666   if (sizesset) {
3667     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3668   }
3669   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3670   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3671 
3672   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3673 
3674   /*
3675      This code adds extra rows to make sure the number of rows is
3676      divisible by the blocksize
3677   */
3678   Mbs        = M/bs;
3679   extra_rows = bs - M + bs*Mbs;
3680   if (extra_rows == bs) extra_rows = 0;
3681   else                  Mbs++;
3682   if (extra_rows && !rank) {
3683     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3684   }
3685 
3686   /* determine ownership of all rows */
3687   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3688     mbs        = Mbs/size + ((Mbs % size) > rank);
3689     m          = mbs*bs;
3690   } else { /* User set */
3691     m          = newmat->rmap->n;
3692     mbs        = m/bs;
3693   }
3694   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3695   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3696 
3697   /* process 0 needs enough room for process with most rows */
3698   if (!rank) {
3699     mmax = rowners[1];
3700     for (i=2; i<size; i++) {
3701       mmax = PetscMax(mmax,rowners[i]);
3702     }
3703     mmax*=bs;
3704   } else mmax = m;
3705 
3706   rowners[0] = 0;
3707   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3708   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3709   rstart = rowners[rank];
3710   rend   = rowners[rank+1];
3711 
3712   /* distribute row lengths to all processors */
3713   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3714   if (!rank) {
3715     mend = m;
3716     if (size == 1) mend = mend - extra_rows;
3717     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3718     for (j=mend; j<m; j++) locrowlens[j] = 1;
3719     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3720     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3721     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3722     for (j=0; j<m; j++) {
3723       procsnz[0] += locrowlens[j];
3724     }
3725     for (i=1; i<size; i++) {
3726       mend = browners[i+1] - browners[i];
3727       if (i == size-1) mend = mend - extra_rows;
3728       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3729       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3730       /* calculate the number of nonzeros on each processor */
3731       for (j=0; j<browners[i+1]-browners[i]; j++) {
3732         procsnz[i] += rowlengths[j];
3733       }
3734       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3735     }
3736     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3737   } else {
3738     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3739   }
3740 
3741   if (!rank) {
3742     /* determine max buffer needed and allocate it */
3743     maxnz = procsnz[0];
3744     for (i=1; i<size; i++) {
3745       maxnz = PetscMax(maxnz,procsnz[i]);
3746     }
3747     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3748 
3749     /* read in my part of the matrix column indices  */
3750     nz     = procsnz[0];
3751     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3752     mycols = ibuf;
3753     if (size == 1)  nz -= extra_rows;
3754     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3755     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3756 
3757     /* read in every ones (except the last) and ship off */
3758     for (i=1; i<size-1; i++) {
3759       nz   = procsnz[i];
3760       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3761       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3762     }
3763     /* read in the stuff for the last proc */
3764     if (size != 1) {
3765       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3766       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3767       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3768       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3769     }
3770     ierr = PetscFree(cols);CHKERRQ(ierr);
3771   } else {
3772     /* determine buffer space needed for message */
3773     nz = 0;
3774     for (i=0; i<m; i++) {
3775       nz += locrowlens[i];
3776     }
3777     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3778     mycols = ibuf;
3779     /* receive message of column indices*/
3780     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3781     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3782     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3783   }
3784 
3785   /* loop over local rows, determining number of off diagonal entries */
3786   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3787   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3788   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3789   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3790   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3791   rowcount = 0; nzcount = 0;
3792   for (i=0; i<mbs; i++) {
3793     dcount  = 0;
3794     odcount = 0;
3795     for (j=0; j<bs; j++) {
3796       kmax = locrowlens[rowcount];
3797       for (k=0; k<kmax; k++) {
3798         tmp = mycols[nzcount++]/bs;
3799         if (!mask[tmp]) {
3800           mask[tmp] = 1;
3801           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3802           else masked1[dcount++] = tmp;
3803         }
3804       }
3805       rowcount++;
3806     }
3807 
3808     dlens[i]  = dcount;
3809     odlens[i] = odcount;
3810 
3811     /* zero out the mask elements we set */
3812     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3813     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3814   }
3815 
3816 
3817   if (!sizesset) {
3818     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3819   }
3820   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3821 
3822   if (!rank) {
3823     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3824     /* read in my part of the matrix numerical values  */
3825     nz = procsnz[0];
3826     vals = buf;
3827     mycols = ibuf;
3828     if (size == 1)  nz -= extra_rows;
3829     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3830     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3831 
3832     /* insert into matrix */
3833     jj      = rstart*bs;
3834     for (i=0; i<m; i++) {
3835       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3836       mycols += locrowlens[i];
3837       vals   += locrowlens[i];
3838       jj++;
3839     }
3840     /* read in other processors (except the last one) and ship out */
3841     for (i=1; i<size-1; i++) {
3842       nz   = procsnz[i];
3843       vals = buf;
3844       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3845       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3846     }
3847     /* the last proc */
3848     if (size != 1){
3849       nz   = procsnz[i] - extra_rows;
3850       vals = buf;
3851       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3852       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3853       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3854     }
3855     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3856   } else {
3857     /* receive numeric values */
3858     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3859 
3860     /* receive message of values*/
3861     vals   = buf;
3862     mycols = ibuf;
3863     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3864     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3865     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3866 
3867     /* insert into matrix */
3868     jj      = rstart*bs;
3869     for (i=0; i<m; i++) {
3870       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3871       mycols += locrowlens[i];
3872       vals   += locrowlens[i];
3873       jj++;
3874     }
3875   }
3876   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3877   ierr = PetscFree(buf);CHKERRQ(ierr);
3878   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3879   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3880   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3881   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3882   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3883   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3884 
3885   PetscFunctionReturn(0);
3886 }
3887 
3888 #undef __FUNCT__
3889 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3890 /*@
3891    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3892 
3893    Input Parameters:
3894 .  mat  - the matrix
3895 .  fact - factor
3896 
3897    Not Collective, each process can use a different factor
3898 
3899    Level: advanced
3900 
3901   Notes:
3902    This can also be set by the command line option: -mat_use_hash_table <fact>
3903 
3904 .keywords: matrix, hashtable, factor, HT
3905 
3906 .seealso: MatSetOption()
3907 @*/
3908 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3909 {
3910   PetscErrorCode ierr;
3911 
3912   PetscFunctionBegin;
3913   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3914   PetscFunctionReturn(0);
3915 }
3916 
3917 EXTERN_C_BEGIN
3918 #undef __FUNCT__
3919 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3920 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3921 {
3922   Mat_MPIBAIJ *baij;
3923 
3924   PetscFunctionBegin;
3925   baij = (Mat_MPIBAIJ*)mat->data;
3926   baij->ht_fact = fact;
3927   PetscFunctionReturn(0);
3928 }
3929 EXTERN_C_END
3930 
3931 #undef __FUNCT__
3932 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3933 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3934 {
3935   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3936   PetscFunctionBegin;
3937   *Ad     = a->A;
3938   *Ao     = a->B;
3939   *colmap = a->garray;
3940   PetscFunctionReturn(0);
3941 }
3942 
3943 /*
3944     Special version for direct calls from Fortran (to eliminate two function call overheads
3945 */
3946 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3947 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3948 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3949 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3950 #endif
3951 
3952 #undef __FUNCT__
3953 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3954 /*@C
3955   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3956 
3957   Collective on Mat
3958 
3959   Input Parameters:
3960 + mat - the matrix
3961 . min - number of input rows
3962 . im - input rows
3963 . nin - number of input columns
3964 . in - input columns
3965 . v - numerical values input
3966 - addvin - INSERT_VALUES or ADD_VALUES
3967 
3968   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3969 
3970   Level: advanced
3971 
3972 .seealso:   MatSetValuesBlocked()
3973 @*/
3974 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3975 {
3976   /* convert input arguments to C version */
3977   Mat             mat = *matin;
3978   PetscInt        m = *min, n = *nin;
3979   InsertMode      addv = *addvin;
3980 
3981   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3982   const MatScalar *value;
3983   MatScalar       *barray=baij->barray;
3984   PetscBool       roworiented = baij->roworiented;
3985   PetscErrorCode  ierr;
3986   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3987   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3988   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3989 
3990   PetscFunctionBegin;
3991   /* tasks normally handled by MatSetValuesBlocked() */
3992   if (mat->insertmode == NOT_SET_VALUES) {
3993     mat->insertmode = addv;
3994   }
3995 #if defined(PETSC_USE_DEBUG)
3996   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3997   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3998 #endif
3999   if (mat->assembled) {
4000     mat->was_assembled = PETSC_TRUE;
4001     mat->assembled     = PETSC_FALSE;
4002   }
4003   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4004 
4005 
4006   if(!barray) {
4007     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
4008     baij->barray = barray;
4009   }
4010 
4011   if (roworiented) {
4012     stepval = (n-1)*bs;
4013   } else {
4014     stepval = (m-1)*bs;
4015   }
4016   for (i=0; i<m; i++) {
4017     if (im[i] < 0) continue;
4018 #if defined(PETSC_USE_DEBUG)
4019     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4020 #endif
4021     if (im[i] >= rstart && im[i] < rend) {
4022       row = im[i] - rstart;
4023       for (j=0; j<n; j++) {
4024         /* If NumCol = 1 then a copy is not required */
4025         if ((roworiented) && (n == 1)) {
4026           barray = (MatScalar*)v + i*bs2;
4027         } else if((!roworiented) && (m == 1)) {
4028           barray = (MatScalar*)v + j*bs2;
4029         } else { /* Here a copy is required */
4030           if (roworiented) {
4031             value = v + i*(stepval+bs)*bs + j*bs;
4032           } else {
4033             value = v + j*(stepval+bs)*bs + i*bs;
4034           }
4035           for (ii=0; ii<bs; ii++,value+=stepval) {
4036             for (jj=0; jj<bs; jj++) {
4037               *barray++  = *value++;
4038             }
4039           }
4040           barray -=bs2;
4041         }
4042 
4043         if (in[j] >= cstart && in[j] < cend){
4044           col  = in[j] - cstart;
4045           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4046         }
4047         else if (in[j] < 0) continue;
4048 #if defined(PETSC_USE_DEBUG)
4049         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4050 #endif
4051         else {
4052           if (mat->was_assembled) {
4053             if (!baij->colmap) {
4054               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4055             }
4056 
4057 #if defined(PETSC_USE_DEBUG)
4058 #if defined (PETSC_USE_CTABLE)
4059             { PetscInt data;
4060               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4061               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4062             }
4063 #else
4064             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4065 #endif
4066 #endif
4067 #if defined (PETSC_USE_CTABLE)
4068 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4069             col  = (col - 1)/bs;
4070 #else
4071             col = (baij->colmap[in[j]] - 1)/bs;
4072 #endif
4073             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4074               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4075               col =  in[j];
4076             }
4077           }
4078           else col = in[j];
4079           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4080         }
4081       }
4082     } else {
4083       if (!baij->donotstash) {
4084         if (roworiented) {
4085           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4086         } else {
4087           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4088         }
4089       }
4090     }
4091   }
4092 
4093   /* task normally handled by MatSetValuesBlocked() */
4094   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4095   PetscFunctionReturn(0);
4096 }
4097 
4098 #undef __FUNCT__
4099 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4100 /*@
4101      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4102          CSR format the local rows.
4103 
4104    Collective on MPI_Comm
4105 
4106    Input Parameters:
4107 +  comm - MPI communicator
4108 .  bs - the block size, only a block size of 1 is supported
4109 .  m - number of local rows (Cannot be PETSC_DECIDE)
4110 .  n - This value should be the same as the local size used in creating the
4111        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4112        calculated if N is given) For square matrices n is almost always m.
4113 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4114 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4115 .   i - row indices
4116 .   j - column indices
4117 -   a - matrix values
4118 
4119    Output Parameter:
4120 .   mat - the matrix
4121 
4122    Level: intermediate
4123 
4124    Notes:
4125        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4126      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4127      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4128 
4129        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4130 
4131 .keywords: matrix, aij, compressed row, sparse, parallel
4132 
4133 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4134           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
4135 @*/
4136 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4137 {
4138   PetscErrorCode ierr;
4139 
4140 
4141  PetscFunctionBegin;
4142   if (i[0]) {
4143     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4144   }
4145   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4146   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4147   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4148   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4149   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4150   PetscFunctionReturn(0);
4151 }
4152