xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 4cb17eb5d24fda53f1a75322daaa24f590374b40)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "petscblaslapack.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
20 {
21   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
22   PetscErrorCode ierr;
23   PetscInt       i,*idxb = 0;
24   PetscScalar    *va,*vb;
25   Vec            vtmp;
26 
27   PetscFunctionBegin;
28   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
29   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
30   if (idx) {
31     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++){
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
41   }
42 
43   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
44   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
45   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
46   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 EXTERN_C_BEGIN
51 #undef __FUNCT__
52 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
60   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 /*
81      Local utility routine that creates a mapping from the global column
82    number to the local number in the off-diagonal part of the local
83    storage of the matrix.  This is done in a non scalable way since the
84    length of colmap equals the global matrix length.
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89 {
90   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
91   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
92   PetscErrorCode ierr;
93   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
94 
95   PetscFunctionBegin;
96 #if defined (PETSC_USE_CTABLE)
97   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
98   for (i=0; i<nbs; i++){
99     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
100   }
101 #else
102   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
103   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
105   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106 #endif
107   PetscFunctionReturn(0);
108 }
109 
110 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
111 { \
112  \
113     brow = row/bs;  \
114     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
115     rmax = aimax[brow]; nrow = ailen[brow]; \
116       bcol = col/bs; \
117       ridx = row % bs; cidx = col % bs; \
118       low = 0; high = nrow; \
119       while (high-low > 3) { \
120         t = (low+high)/2; \
121         if (rp[t] > bcol) high = t; \
122         else              low  = t; \
123       } \
124       for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > bcol) break; \
126         if (rp[_i] == bcol) { \
127           bap  = ap +  bs2*_i + bs*cidx + ridx; \
128           if (addv == ADD_VALUES) *bap += value;  \
129           else                    *bap  = value;  \
130           goto a_noinsert; \
131         } \
132       } \
133       if (a->nonew == 1) goto a_noinsert; \
134       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
136       N = nrow++ - 1;  \
137       /* shift up all the later entries in this row */ \
138       for (ii=N; ii>=_i; ii--) { \
139         rp[ii+1] = rp[ii]; \
140         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
141       } \
142       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
143       rp[_i]                      = bcol;  \
144       ap[bs2*_i + bs*cidx + ridx] = value;  \
145       a_noinsert:; \
146     ailen[brow] = nrow; \
147 }
148 
149 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
150 { \
151     brow = row/bs;  \
152     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
153     rmax = bimax[brow]; nrow = bilen[brow]; \
154       bcol = col/bs; \
155       ridx = row % bs; cidx = col % bs; \
156       low = 0; high = nrow; \
157       while (high-low > 3) { \
158         t = (low+high)/2; \
159         if (rp[t] > bcol) high = t; \
160         else              low  = t; \
161       } \
162       for (_i=low; _i<high; _i++) { \
163         if (rp[_i] > bcol) break; \
164         if (rp[_i] == bcol) { \
165           bap  = ap +  bs2*_i + bs*cidx + ridx; \
166           if (addv == ADD_VALUES) *bap += value;  \
167           else                    *bap  = value;  \
168           goto b_noinsert; \
169         } \
170       } \
171       if (b->nonew == 1) goto b_noinsert; \
172       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
173       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
174       CHKMEMQ;\
175       N = nrow++ - 1;  \
176       /* shift up all the later entries in this row */ \
177       for (ii=N; ii>=_i; ii--) { \
178         rp[ii+1] = rp[ii]; \
179         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
180       } \
181       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
182       rp[_i]                      = bcol;  \
183       ap[bs2*_i + bs*cidx + ridx] = value;  \
184       b_noinsert:; \
185     bilen[brow] = nrow; \
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatSetValues_MPIBAIJ"
190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
191 {
192   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
193   MatScalar      value;
194   PetscTruth     roworiented = baij->roworiented;
195   PetscErrorCode ierr;
196   PetscInt       i,j,row,col;
197   PetscInt       rstart_orig=mat->rmap->rstart;
198   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
199   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
200 
201   /* Some Variables required in the macro */
202   Mat            A = baij->A;
203   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
204   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
205   MatScalar      *aa=a->a;
206 
207   Mat            B = baij->B;
208   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
209   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
210   MatScalar      *ba=b->a;
211 
212   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
213   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
214   MatScalar      *ap,*bap;
215 
216   PetscFunctionBegin;
217   if (v) PetscValidScalarPointer(v,6);
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_DEBUG)
221     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
222 #endif
223     if (im[i] >= rstart_orig && im[i] < rend_orig) {
224       row = im[i] - rstart_orig;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart_orig && in[j] < cend_orig){
227           col = in[j] - cstart_orig;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         } else if (in[j] < 0) continue;
232 #if defined(PETSC_USE_DEBUG)
233         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240 #if defined (PETSC_USE_CTABLE)
241             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
242             col  = col - 1;
243 #else
244             col = baij->colmap[in[j]/bs] - 1;
245 #endif
246             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
247               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
248               col =  in[j];
249               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
250               B = baij->B;
251               b = (Mat_SeqBAIJ*)(B)->data;
252               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
253               ba=b->a;
254             } else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         if (roworiented) {
265           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
266         } else {
267           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
268         }
269       }
270     }
271   }
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
278 {
279   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
280   const PetscScalar *value;
281   MatScalar         *barray=baij->barray;
282   PetscTruth        roworiented = baij->roworiented;
283   PetscErrorCode    ierr;
284   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
285   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
286   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
287 
288   PetscFunctionBegin;
289   if(!barray) {
290     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
291     baij->barray = barray;
292   }
293 
294   if (roworiented) {
295     stepval = (n-1)*bs;
296   } else {
297     stepval = (m-1)*bs;
298   }
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316 	    value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) {
320               barray[jj]  = value[jj];
321             }
322             barray += bs;
323           }
324           barray -= bs2;
325         }
326 
327         if (in[j] >= cstart && in[j] < cend){
328           col  = in[j] - cstart;
329           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
330         }
331         else if (in[j] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
334 #endif
335         else {
336           if (mat->was_assembled) {
337             if (!baij->colmap) {
338               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
339             }
340 
341 #if defined(PETSC_USE_DEBUG)
342 #if defined (PETSC_USE_CTABLE)
343             { PetscInt data;
344               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
345               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346             }
347 #else
348             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
349 #endif
350 #endif
351 #if defined (PETSC_USE_CTABLE)
352 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
353             col  = (col - 1)/bs;
354 #else
355             col = (baij->colmap[in[j]] - 1)/bs;
356 #endif
357             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
358               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
359               col =  in[j];
360             }
361           }
362           else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
389   PetscTruth     roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408       row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
413         /* Look up PetscInto the Hash Table */
414         key = (row/bs)*Nbs+(col/bs)+1;
415         h1  = HASH(size,key,tmp);
416 
417 
418         idx = h1;
419 #if defined(PETSC_USE_DEBUG)
420         insert_ct++;
421         total_ct++;
422         if (HT[idx] != key) {
423           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
424           if (idx == size) {
425             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
426             if (idx == h1) {
427               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
428             }
429           }
430         }
431 #else
432         if (HT[idx] != key) {
433           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
434           if (idx == size) {
435             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
436             if (idx == h1) {
437               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
438             }
439           }
440         }
441 #endif
442         /* A HASH table entry is found, so insert the values at the correct address */
443         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
444         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
445       }
446     } else {
447       if (!baij->donotstash) {
448         if (roworiented) {
449           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
450         } else {
451           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
452         }
453       }
454     }
455   }
456 #if defined(PETSC_USE_DEBUG)
457   baij->ht_total_ct = total_ct;
458   baij->ht_insert_ct = insert_ct;
459 #endif
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
466 {
467   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
468   PetscTruth        roworiented = baij->roworiented;
469   PetscErrorCode    ierr;
470   PetscInt          i,j,ii,jj,row,col;
471   PetscInt          rstart=baij->rstartbs;
472   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
473   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
474   PetscReal         tmp;
475   MatScalar         **HD = baij->hd,*baij_a;
476   const PetscScalar *v_t,*value;
477 #if defined(PETSC_USE_DEBUG)
478   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
479 #endif
480 
481   PetscFunctionBegin;
482 
483   if (roworiented) {
484     stepval = (n-1)*bs;
485   } else {
486     stepval = (m-1)*bs;
487   }
488   for (i=0; i<m; i++) {
489 #if defined(PETSC_USE_DEBUG)
490     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
491     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
492 #endif
493     row   = im[i];
494     v_t   = v + i*nbs2;
495     if (row >= rstart && row < rend) {
496       for (j=0; j<n; j++) {
497         col = in[j];
498 
499         /* Look up into the Hash Table */
500         key = row*Nbs+col+1;
501         h1  = HASH(size,key,tmp);
502 
503         idx = h1;
504 #if defined(PETSC_USE_DEBUG)
505         total_ct++;
506         insert_ct++;
507        if (HT[idx] != key) {
508           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
509           if (idx == size) {
510             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
511             if (idx == h1) {
512               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
513             }
514           }
515         }
516 #else
517         if (HT[idx] != key) {
518           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
519           if (idx == size) {
520             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
521             if (idx == h1) {
522               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
523             }
524           }
525         }
526 #endif
527         baij_a = HD[idx];
528         if (roworiented) {
529           /*value = v + i*(stepval+bs)*bs + j*bs;*/
530           /* value = v + (i*(stepval+bs)+j)*bs; */
531           value = v_t;
532           v_t  += bs;
533           if (addv == ADD_VALUES) {
534             for (ii=0; ii<bs; ii++,value+=stepval) {
535               for (jj=ii; jj<bs2; jj+=bs) {
536                 baij_a[jj]  += *value++;
537               }
538             }
539           } else {
540             for (ii=0; ii<bs; ii++,value+=stepval) {
541               for (jj=ii; jj<bs2; jj+=bs) {
542                 baij_a[jj]  = *value++;
543               }
544             }
545           }
546         } else {
547           value = v + j*(stepval+bs)*bs + i*bs;
548           if (addv == ADD_VALUES) {
549             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
550               for (jj=0; jj<bs; jj++) {
551                 baij_a[jj]  += *value++;
552               }
553             }
554           } else {
555             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
556               for (jj=0; jj<bs; jj++) {
557                 baij_a[jj]  = *value++;
558               }
559             }
560           }
561         }
562       }
563     } else {
564       if (!baij->donotstash) {
565         if (roworiented) {
566           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         } else {
568           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
569         }
570       }
571     }
572   }
573 #if defined(PETSC_USE_DEBUG)
574   baij->ht_total_ct = total_ct;
575   baij->ht_insert_ct = insert_ct;
576 #endif
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatGetValues_MPIBAIJ"
582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
583 {
584   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
585   PetscErrorCode ierr;
586   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
587   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
588 
589   PetscFunctionBegin;
590   for (i=0; i<m; i++) {
591     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
592     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
593     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
594       row = idxm[i] - bsrstart;
595       for (j=0; j<n; j++) {
596         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
597         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
598         if (idxn[j] >= bscstart && idxn[j] < bscend){
599           col = idxn[j] - bscstart;
600           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
601         } else {
602           if (!baij->colmap) {
603             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
604           }
605 #if defined (PETSC_USE_CTABLE)
606           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
607           data --;
608 #else
609           data = baij->colmap[idxn[j]/bs]-1;
610 #endif
611           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
612           else {
613             col  = data + idxn[j]%bs;
614             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
615           }
616         }
617       }
618     } else {
619       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620     }
621   }
622  PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatNorm_MPIBAIJ"
627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
628 {
629   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
630   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
633   PetscReal      sum = 0.0;
634   MatScalar      *v;
635 
636   PetscFunctionBegin;
637   if (baij->size == 1) {
638     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
639   } else {
640     if (type == NORM_FROBENIUS) {
641       v = amat->a;
642       nz = amat->nz*bs2;
643       for (i=0; i<nz; i++) {
644 #if defined(PETSC_USE_COMPLEX)
645         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
646 #else
647         sum += (*v)*(*v); v++;
648 #endif
649       }
650       v = bmat->a;
651       nz = bmat->nz*bs2;
652       for (i=0; i<nz; i++) {
653 #if defined(PETSC_USE_COMPLEX)
654         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
655 #else
656         sum += (*v)*(*v); v++;
657 #endif
658       }
659       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
660       *nrm = sqrt(*nrm);
661     } else if (type == NORM_1) { /* max column sum */
662       PetscReal *tmp,*tmp2;
663       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
664       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
665       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
666       v = amat->a; jj = amat->j;
667       for (i=0; i<amat->nz; i++) {
668         for (j=0; j<bs; j++){
669           col = bs*(cstart + *jj) + j; /* column index */
670           for (row=0; row<bs; row++){
671             tmp[col] += PetscAbsScalar(*v);  v++;
672           }
673         }
674         jj++;
675       }
676       v = bmat->a; jj = bmat->j;
677       for (i=0; i<bmat->nz; i++) {
678         for (j=0; j<bs; j++){
679           col = bs*garray[*jj] + j;
680           for (row=0; row<bs; row++){
681             tmp[col] += PetscAbsScalar(*v); v++;
682           }
683         }
684         jj++;
685       }
686       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
687       *nrm = 0.0;
688       for (j=0; j<mat->cmap->N; j++) {
689         if (tmp2[j] > *nrm) *nrm = tmp2[j];
690       }
691       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
692     } else if (type == NORM_INFINITY) { /* max row sum */
693       PetscReal *sums;
694       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
695       sum = 0.0;
696       for (j=0; j<amat->mbs; j++) {
697         for (row=0; row<bs; row++) sums[row] = 0.0;
698         v = amat->a + bs2*amat->i[j];
699         nz = amat->i[j+1]-amat->i[j];
700         for (i=0; i<nz; i++) {
701           for (col=0; col<bs; col++){
702             for (row=0; row<bs; row++){
703               sums[row] += PetscAbsScalar(*v); v++;
704             }
705           }
706         }
707         v = bmat->a + bs2*bmat->i[j];
708         nz = bmat->i[j+1]-bmat->i[j];
709         for (i=0; i<nz; i++) {
710           for (col=0; col<bs; col++){
711             for (row=0; row<bs; row++){
712               sums[row] += PetscAbsScalar(*v); v++;
713             }
714           }
715         }
716         for (row=0; row<bs; row++){
717           if (sums[row] > sum) sum = sums[row];
718         }
719       }
720       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
721       ierr = PetscFree(sums);CHKERRQ(ierr);
722     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 /*
728   Creates the hash table, and sets the table
729   This table is created only once.
730   If new entried need to be added to the matrix
731   then the hash table has to be destroyed and
732   recreated.
733 */
734 #undef __FUNCT__
735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
737 {
738   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
739   Mat            A = baij->A,B=baij->B;
740   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
741   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
742   PetscErrorCode ierr;
743   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
744   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
745   PetscInt       *HT,key;
746   MatScalar      **HD;
747   PetscReal      tmp;
748 #if defined(PETSC_USE_INFO)
749   PetscInt       ct=0,max=0;
750 #endif
751 
752   PetscFunctionBegin;
753   if (baij->ht) PetscFunctionReturn(0);
754 
755   baij->ht_size = (PetscInt)(factor*nz);
756   ht_size       = baij->ht_size;
757 
758   /* Allocate Memory for Hash Table */
759   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
760   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
761   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
762   HD   = baij->hd;
763   HT   = baij->ht;
764 
765   /* Loop Over A */
766   for (i=0; i<a->mbs; i++) {
767     for (j=ai[i]; j<ai[i+1]; j++) {
768       row = i+rstart;
769       col = aj[j]+cstart;
770 
771       key = row*Nbs + col + 1;
772       h1  = HASH(ht_size,key,tmp);
773       for (k=0; k<ht_size; k++){
774         if (!HT[(h1+k)%ht_size]) {
775           HT[(h1+k)%ht_size] = key;
776           HD[(h1+k)%ht_size] = a->a + j*bs2;
777           break;
778 #if defined(PETSC_USE_INFO)
779         } else {
780           ct++;
781 #endif
782         }
783       }
784 #if defined(PETSC_USE_INFO)
785       if (k> max) max = k;
786 #endif
787     }
788   }
789   /* Loop Over B */
790   for (i=0; i<b->mbs; i++) {
791     for (j=bi[i]; j<bi[i+1]; j++) {
792       row = i+rstart;
793       col = garray[bj[j]];
794       key = row*Nbs + col + 1;
795       h1  = HASH(ht_size,key,tmp);
796       for (k=0; k<ht_size; k++){
797         if (!HT[(h1+k)%ht_size]) {
798           HT[(h1+k)%ht_size] = key;
799           HD[(h1+k)%ht_size] = b->a + j*bs2;
800           break;
801 #if defined(PETSC_USE_INFO)
802         } else {
803           ct++;
804 #endif
805         }
806       }
807 #if defined(PETSC_USE_INFO)
808       if (k> max) max = k;
809 #endif
810     }
811   }
812 
813   /* Print Summary */
814 #if defined(PETSC_USE_INFO)
815   for (i=0,j=0; i<ht_size; i++) {
816     if (HT[i]) {j++;}
817   }
818   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
819 #endif
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
826 {
827   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
828   PetscErrorCode ierr;
829   PetscInt       nstash,reallocs;
830   InsertMode     addv;
831 
832   PetscFunctionBegin;
833   if (baij->donotstash || mat->nooffprocentries) {
834     PetscFunctionReturn(0);
835   }
836 
837   /* make sure all processors are either in INSERTMODE or ADDMODE */
838   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
839   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
840   mat->insertmode = addv; /* in case this processor had no cache */
841 
842   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
843   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
844   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
845   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
846   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
847   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
854 {
855   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
856   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
857   PetscErrorCode ierr;
858   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
859   PetscInt       *row,*col;
860   PetscTruth     r1,r2,r3,other_disassembled;
861   MatScalar      *val;
862   InsertMode     addv = mat->insertmode;
863   PetscMPIInt    n;
864 
865   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
866   PetscFunctionBegin;
867   if (!baij->donotstash && !mat->nooffprocentries) {
868     while (1) {
869       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
870       if (!flg) break;
871 
872       for (i=0; i<n;) {
873         /* Now identify the consecutive vals belonging to the same row */
874         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
875         if (j < n) ncols = j-i;
876         else       ncols = n-i;
877         /* Now assemble all these values with a single function call */
878         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
879         i = j;
880       }
881     }
882     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
883     /* Now process the block-stash. Since the values are stashed column-oriented,
884        set the roworiented flag to column oriented, and after MatSetValues()
885        restore the original flags */
886     r1 = baij->roworiented;
887     r2 = a->roworiented;
888     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
889     baij->roworiented = PETSC_FALSE;
890     a->roworiented    = PETSC_FALSE;
891     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
892     while (1) {
893       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
894       if (!flg) break;
895 
896       for (i=0; i<n;) {
897         /* Now identify the consecutive vals belonging to the same row */
898         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
899         if (j < n) ncols = j-i;
900         else       ncols = n-i;
901         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
902         i = j;
903       }
904     }
905     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
906     baij->roworiented = r1;
907     a->roworiented    = r2;
908     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
909   }
910 
911   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
912   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
913 
914   /* determine if any processor has disassembled, if so we must
915      also disassemble ourselfs, in order that we may reassemble. */
916   /*
917      if nonzero structure of submatrix B cannot change then we know that
918      no processor disassembled thus we can skip this stuff
919   */
920   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
921     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
922     if (mat->was_assembled && !other_disassembled) {
923       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
924     }
925   }
926 
927   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
928     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
929   }
930   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
931   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
932   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
933 
934 #if defined(PETSC_USE_INFO)
935   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
936     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
937     baij->ht_total_ct  = 0;
938     baij->ht_insert_ct = 0;
939   }
940 #endif
941   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
942     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
943     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
944     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
945   }
946 
947   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
948   baij->rowvalues = 0;
949   PetscFunctionReturn(0);
950 }
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
955 {
956   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
957   PetscErrorCode    ierr;
958   PetscMPIInt       size = baij->size,rank = baij->rank;
959   PetscInt          bs = mat->rmap->bs;
960   PetscTruth        iascii,isdraw;
961   PetscViewer       sviewer;
962   PetscViewerFormat format;
963 
964   PetscFunctionBegin;
965   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
966   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
967   if (iascii) {
968     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
969     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
970       MatInfo info;
971       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
972       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
973       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
974               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
975               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
976       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
977       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
978       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
979       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
980       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
981       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
982       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
983       PetscFunctionReturn(0);
984     } else if (format == PETSC_VIEWER_ASCII_INFO) {
985       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
986       PetscFunctionReturn(0);
987     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
988       PetscFunctionReturn(0);
989     }
990   }
991 
992   if (isdraw) {
993     PetscDraw       draw;
994     PetscTruth isnull;
995     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
996     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
997   }
998 
999   if (size == 1) {
1000     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1001     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1002   } else {
1003     /* assemble the entire matrix onto first processor. */
1004     Mat         A;
1005     Mat_SeqBAIJ *Aloc;
1006     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1007     MatScalar   *a;
1008 
1009     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1010     /* Perhaps this should be the type of mat? */
1011     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1012     if (!rank) {
1013       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1014     } else {
1015       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1016     }
1017     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1018     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1019     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1020 
1021     /* copy over the A part */
1022     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1023     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1024     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1025 
1026     for (i=0; i<mbs; i++) {
1027       rvals[0] = bs*(baij->rstartbs + i);
1028       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1029       for (j=ai[i]; j<ai[i+1]; j++) {
1030         col = (baij->cstartbs+aj[j])*bs;
1031         for (k=0; k<bs; k++) {
1032           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1033           col++; a += bs;
1034         }
1035       }
1036     }
1037     /* copy over the B part */
1038     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1039     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1040     for (i=0; i<mbs; i++) {
1041       rvals[0] = bs*(baij->rstartbs + i);
1042       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1043       for (j=ai[i]; j<ai[i+1]; j++) {
1044         col = baij->garray[aj[j]]*bs;
1045         for (k=0; k<bs; k++) {
1046           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1047           col++; a += bs;
1048         }
1049       }
1050     }
1051     ierr = PetscFree(rvals);CHKERRQ(ierr);
1052     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1054     /*
1055        Everyone has to call to draw the matrix since the graphics waits are
1056        synchronized across all processors that share the PetscDraw object
1057     */
1058     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1059     if (!rank) {
1060       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1061       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1062     }
1063     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1064     ierr = MatDestroy(A);CHKERRQ(ierr);
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatView_MPIBAIJ"
1071 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1072 {
1073   PetscErrorCode ierr;
1074   PetscTruth     iascii,isdraw,issocket,isbinary;
1075 
1076   PetscFunctionBegin;
1077   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1078   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1079   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1080   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1081   if (iascii || isdraw || issocket || isbinary) {
1082     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1083   } else {
1084     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1085   }
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1091 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1092 {
1093   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097 #if defined(PETSC_USE_LOG)
1098   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1099 #endif
1100   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1101   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1102   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
1103   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
1104 #if defined (PETSC_USE_CTABLE)
1105   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1106 #else
1107   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1108 #endif
1109   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1110   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1111   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1112   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1113   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1114   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1115   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1116   ierr = PetscFree(baij);CHKERRQ(ierr);
1117 
1118   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1119   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1120   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatMult_MPIBAIJ"
1131 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1132 {
1133   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1134   PetscErrorCode ierr;
1135   PetscInt       nt;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1139   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1140   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1141   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1142   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1143   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1144   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1145   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1151 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1152 {
1153   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1154   PetscErrorCode ierr;
1155 
1156   PetscFunctionBegin;
1157   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1158   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1159   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1160   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1166 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1167 {
1168   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1169   PetscErrorCode ierr;
1170   PetscTruth     merged;
1171 
1172   PetscFunctionBegin;
1173   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1174   /* do nondiagonal part */
1175   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1176   if (!merged) {
1177     /* send it on its way */
1178     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1179     /* do local part */
1180     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1181     /* receive remote parts: note this assumes the values are not actually */
1182     /* inserted in yy until the next line */
1183     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1184   } else {
1185     /* do local part */
1186     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1187     /* send it on its way */
1188     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1189     /* values actually were received in the Begin() but we need to call this nop */
1190     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1191   }
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1197 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1198 {
1199   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   /* do nondiagonal part */
1204   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1205   /* send it on its way */
1206   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1207   /* do local part */
1208   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1209   /* receive remote parts: note this assumes the values are not actually */
1210   /* inserted in yy until the next line, which is true for my implementation*/
1211   /* but is not perhaps always true. */
1212   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 /*
1217   This only works correctly for square matrices where the subblock A->A is the
1218    diagonal block
1219 */
1220 #undef __FUNCT__
1221 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1222 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1223 {
1224   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1225   PetscErrorCode ierr;
1226 
1227   PetscFunctionBegin;
1228   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1229   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatScale_MPIBAIJ"
1235 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1236 {
1237   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1238   PetscErrorCode ierr;
1239 
1240   PetscFunctionBegin;
1241   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1242   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1248 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1249 {
1250   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1251   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1252   PetscErrorCode ierr;
1253   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1254   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1255   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1256 
1257   PetscFunctionBegin;
1258   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1259   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1260   mat->getrowactive = PETSC_TRUE;
1261 
1262   if (!mat->rowvalues && (idx || v)) {
1263     /*
1264         allocate enough space to hold information from the longest row.
1265     */
1266     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1267     PetscInt     max = 1,mbs = mat->mbs,tmp;
1268     for (i=0; i<mbs; i++) {
1269       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1270       if (max < tmp) { max = tmp; }
1271     }
1272     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1273   }
1274   lrow = row - brstart;
1275 
1276   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1277   if (!v)   {pvA = 0; pvB = 0;}
1278   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1279   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1280   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1281   nztot = nzA + nzB;
1282 
1283   cmap  = mat->garray;
1284   if (v  || idx) {
1285     if (nztot) {
1286       /* Sort by increasing column numbers, assuming A and B already sorted */
1287       PetscInt imark = -1;
1288       if (v) {
1289         *v = v_p = mat->rowvalues;
1290         for (i=0; i<nzB; i++) {
1291           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1292           else break;
1293         }
1294         imark = i;
1295         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1296         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1297       }
1298       if (idx) {
1299         *idx = idx_p = mat->rowindices;
1300         if (imark > -1) {
1301           for (i=0; i<imark; i++) {
1302             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1303           }
1304         } else {
1305           for (i=0; i<nzB; i++) {
1306             if (cmap[cworkB[i]/bs] < cstart)
1307               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1308             else break;
1309           }
1310           imark = i;
1311         }
1312         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1313         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1314       }
1315     } else {
1316       if (idx) *idx = 0;
1317       if (v)   *v   = 0;
1318     }
1319   }
1320   *nz = nztot;
1321   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1322   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1328 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1329 {
1330   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1331 
1332   PetscFunctionBegin;
1333   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1334   baij->getrowactive = PETSC_FALSE;
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1340 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1341 {
1342   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1343   PetscErrorCode ierr;
1344 
1345   PetscFunctionBegin;
1346   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1347   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1353 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1354 {
1355   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1356   Mat            A = a->A,B = a->B;
1357   PetscErrorCode ierr;
1358   PetscReal      isend[5],irecv[5];
1359 
1360   PetscFunctionBegin;
1361   info->block_size     = (PetscReal)matin->rmap->bs;
1362   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1363   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1364   isend[3] = info->memory;  isend[4] = info->mallocs;
1365   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1366   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1367   isend[3] += info->memory;  isend[4] += info->mallocs;
1368   if (flag == MAT_LOCAL) {
1369     info->nz_used      = isend[0];
1370     info->nz_allocated = isend[1];
1371     info->nz_unneeded  = isend[2];
1372     info->memory       = isend[3];
1373     info->mallocs      = isend[4];
1374   } else if (flag == MAT_GLOBAL_MAX) {
1375     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1376     info->nz_used      = irecv[0];
1377     info->nz_allocated = irecv[1];
1378     info->nz_unneeded  = irecv[2];
1379     info->memory       = irecv[3];
1380     info->mallocs      = irecv[4];
1381   } else if (flag == MAT_GLOBAL_SUM) {
1382     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1383     info->nz_used      = irecv[0];
1384     info->nz_allocated = irecv[1];
1385     info->nz_unneeded  = irecv[2];
1386     info->memory       = irecv[3];
1387     info->mallocs      = irecv[4];
1388   } else {
1389     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1390   }
1391   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1392   info->fill_ratio_needed = 0;
1393   info->factor_mallocs    = 0;
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 #undef __FUNCT__
1398 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1399 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1400 {
1401   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1402   PetscErrorCode ierr;
1403 
1404   PetscFunctionBegin;
1405   switch (op) {
1406   case MAT_NEW_NONZERO_LOCATIONS:
1407   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1408   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1409   case MAT_KEEP_NONZERO_PATTERN:
1410   case MAT_NEW_NONZERO_LOCATION_ERR:
1411     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1412     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1413     break;
1414   case MAT_ROW_ORIENTED:
1415     a->roworiented = flg;
1416     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1417     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1418     break;
1419   case MAT_NEW_DIAGONALS:
1420     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1421     break;
1422   case MAT_IGNORE_OFF_PROC_ENTRIES:
1423     a->donotstash = flg;
1424     break;
1425   case MAT_USE_HASH_TABLE:
1426     a->ht_flag = flg;
1427     break;
1428   case MAT_SYMMETRIC:
1429   case MAT_STRUCTURALLY_SYMMETRIC:
1430   case MAT_HERMITIAN:
1431   case MAT_SYMMETRY_ETERNAL:
1432     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1433     break;
1434   default:
1435     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1436   }
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1442 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1443 {
1444   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1445   Mat_SeqBAIJ    *Aloc;
1446   Mat            B;
1447   PetscErrorCode ierr;
1448   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1449   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1450   MatScalar      *a;
1451 
1452   PetscFunctionBegin;
1453   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1454   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1455     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1456     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1457     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1458     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1459   } else {
1460     B = *matout;
1461   }
1462 
1463   /* copy over the A part */
1464   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1465   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1466   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1467 
1468   for (i=0; i<mbs; i++) {
1469     rvals[0] = bs*(baij->rstartbs + i);
1470     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1471     for (j=ai[i]; j<ai[i+1]; j++) {
1472       col = (baij->cstartbs+aj[j])*bs;
1473       for (k=0; k<bs; k++) {
1474         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1475         col++; a += bs;
1476       }
1477     }
1478   }
1479   /* copy over the B part */
1480   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1481   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1482   for (i=0; i<mbs; i++) {
1483     rvals[0] = bs*(baij->rstartbs + i);
1484     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1485     for (j=ai[i]; j<ai[i+1]; j++) {
1486       col = baij->garray[aj[j]]*bs;
1487       for (k=0; k<bs; k++) {
1488         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1489         col++; a += bs;
1490       }
1491     }
1492   }
1493   ierr = PetscFree(rvals);CHKERRQ(ierr);
1494   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1495   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1496 
1497   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1498     *matout = B;
1499   } else {
1500     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1501   }
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 #undef __FUNCT__
1506 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1507 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1508 {
1509   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1510   Mat            a = baij->A,b = baij->B;
1511   PetscErrorCode ierr;
1512   PetscInt       s1,s2,s3;
1513 
1514   PetscFunctionBegin;
1515   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1516   if (rr) {
1517     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1518     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1519     /* Overlap communication with computation. */
1520     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1521   }
1522   if (ll) {
1523     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1524     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1525     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1526   }
1527   /* scale  the diagonal block */
1528   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1529 
1530   if (rr) {
1531     /* Do a scatter end and then right scale the off-diagonal block */
1532     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1533     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1534   }
1535 
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1541 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1542 {
1543   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1544   PetscErrorCode ierr;
1545   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1546   PetscInt       i,*owners = A->rmap->range;
1547   PetscInt       *nprocs,j,idx,nsends,row;
1548   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1549   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1550   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1551   MPI_Comm       comm = ((PetscObject)A)->comm;
1552   MPI_Request    *send_waits,*recv_waits;
1553   MPI_Status     recv_status,*send_status;
1554 #if defined(PETSC_DEBUG)
1555   PetscTruth     found = PETSC_FALSE;
1556 #endif
1557 
1558   PetscFunctionBegin;
1559   /*  first count number of contributors to each processor */
1560   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1561   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1562   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1563   j = 0;
1564   for (i=0; i<N; i++) {
1565     if (lastidx > (idx = rows[i])) j = 0;
1566     lastidx = idx;
1567     for (; j<size; j++) {
1568       if (idx >= owners[j] && idx < owners[j+1]) {
1569         nprocs[2*j]++;
1570         nprocs[2*j+1] = 1;
1571         owner[i] = j;
1572 #if defined(PETSC_DEBUG)
1573         found = PETSC_TRUE;
1574 #endif
1575         break;
1576       }
1577     }
1578 #if defined(PETSC_DEBUG)
1579     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1580     found = PETSC_FALSE;
1581 #endif
1582   }
1583   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1584 
1585   /* inform other processors of number of messages and max length*/
1586   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1587 
1588   /* post receives:   */
1589   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1590   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1591   for (i=0; i<nrecvs; i++) {
1592     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1593   }
1594 
1595   /* do sends:
1596      1) starts[i] gives the starting index in svalues for stuff going to
1597      the ith processor
1598   */
1599   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1600   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1601   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1602   starts[0]  = 0;
1603   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1604   for (i=0; i<N; i++) {
1605     svalues[starts[owner[i]]++] = rows[i];
1606   }
1607 
1608   starts[0] = 0;
1609   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1610   count = 0;
1611   for (i=0; i<size; i++) {
1612     if (nprocs[2*i+1]) {
1613       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1614     }
1615   }
1616   ierr = PetscFree(starts);CHKERRQ(ierr);
1617 
1618   base = owners[rank];
1619 
1620   /*  wait on receives */
1621   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1622   count  = nrecvs;
1623   slen = 0;
1624   while (count) {
1625     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1626     /* unpack receives into our local space */
1627     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1628     source[imdex]  = recv_status.MPI_SOURCE;
1629     lens[imdex]    = n;
1630     slen          += n;
1631     count--;
1632   }
1633   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1634 
1635   /* move the data into the send scatter */
1636   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1637   count = 0;
1638   for (i=0; i<nrecvs; i++) {
1639     values = rvalues + i*nmax;
1640     for (j=0; j<lens[i]; j++) {
1641       lrows[count++] = values[j] - base;
1642     }
1643   }
1644   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1645   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1646   ierr = PetscFree(owner);CHKERRQ(ierr);
1647   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1648 
1649   /* actually zap the local rows */
1650   /*
1651         Zero the required rows. If the "diagonal block" of the matrix
1652      is square and the user wishes to set the diagonal we use separate
1653      code so that MatSetValues() is not called for each diagonal allocating
1654      new memory, thus calling lots of mallocs and slowing things down.
1655 
1656   */
1657   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1658   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1659   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1660     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1661   } else if (diag != 0.0) {
1662     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1663     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1664        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1665     for (i=0; i<slen; i++) {
1666       row  = lrows[i] + rstart_bs;
1667       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1668     }
1669     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1671   } else {
1672     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1673   }
1674 
1675   ierr = PetscFree(lrows);CHKERRQ(ierr);
1676 
1677   /* wait on sends */
1678   if (nsends) {
1679     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1680     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1681     ierr = PetscFree(send_status);CHKERRQ(ierr);
1682   }
1683   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1684   ierr = PetscFree(svalues);CHKERRQ(ierr);
1685 
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1691 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1692 {
1693   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1694   PetscErrorCode ierr;
1695 
1696   PetscFunctionBegin;
1697   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1702 
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatEqual_MPIBAIJ"
1705 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1706 {
1707   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1708   Mat            a,b,c,d;
1709   PetscTruth     flg;
1710   PetscErrorCode ierr;
1711 
1712   PetscFunctionBegin;
1713   a = matA->A; b = matA->B;
1714   c = matB->A; d = matB->B;
1715 
1716   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1717   if (flg) {
1718     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1719   }
1720   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 #undef __FUNCT__
1725 #define __FUNCT__ "MatCopy_MPIBAIJ"
1726 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1727 {
1728   PetscErrorCode ierr;
1729   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1730   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1731 
1732   PetscFunctionBegin;
1733   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1734   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1735     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1736   } else {
1737     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1738     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1739   }
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1745 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1746 {
1747   PetscErrorCode ierr;
1748 
1749   PetscFunctionBegin;
1750   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1756 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1757 {
1758   PetscErrorCode ierr;
1759   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1760   PetscBLASInt   bnz,one=1;
1761   Mat_SeqBAIJ    *x,*y;
1762 
1763   PetscFunctionBegin;
1764   if (str == SAME_NONZERO_PATTERN) {
1765     PetscScalar alpha = a;
1766     x = (Mat_SeqBAIJ *)xx->A->data;
1767     y = (Mat_SeqBAIJ *)yy->A->data;
1768     bnz = PetscBLASIntCast(x->nz);
1769     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1770     x = (Mat_SeqBAIJ *)xx->B->data;
1771     y = (Mat_SeqBAIJ *)yy->B->data;
1772     bnz = PetscBLASIntCast(x->nz);
1773     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1774   } else {
1775     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1776   }
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1782 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1783 {
1784   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1785   PetscInt rbs,cbs;
1786   PetscErrorCode ierr;
1787 
1788   PetscFunctionBegin;
1789   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1790   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1791   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1792   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1793   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1794   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 #undef __FUNCT__
1799 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1800 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1801 {
1802   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1803   PetscErrorCode ierr;
1804 
1805   PetscFunctionBegin;
1806   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1807   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1808   PetscFunctionReturn(0);
1809 }
1810 
1811 #undef __FUNCT__
1812 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1813 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1814 {
1815   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1816   PetscErrorCode ierr;
1817 
1818   PetscFunctionBegin;
1819   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1820   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 #undef __FUNCT__
1825 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1826 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1827 {
1828   PetscErrorCode ierr;
1829   IS             iscol_local;
1830   PetscInt       csize;
1831 
1832   PetscFunctionBegin;
1833   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1834   if (call == MAT_REUSE_MATRIX) {
1835     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1836     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1837   } else {
1838     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1839   }
1840   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1841   if (call == MAT_INITIAL_MATRIX) {
1842     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1843     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
1844   }
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1850 /*
1851     Not great since it makes two copies of the submatrix, first an SeqBAIJ
1852   in local and then by concatenating the local matrices the end result.
1853   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1854 */
1855 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1856 {
1857   PetscErrorCode ierr;
1858   PetscMPIInt    rank,size;
1859   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1860   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1861   Mat            *local,M,Mreuse;
1862   MatScalar      *vwork,*aa;
1863   MPI_Comm       comm = ((PetscObject)mat)->comm;
1864   Mat_SeqBAIJ    *aij;
1865 
1866 
1867   PetscFunctionBegin;
1868   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1869   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1870 
1871   if (call ==  MAT_REUSE_MATRIX) {
1872     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1873     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1874     local = &Mreuse;
1875     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1876   } else {
1877     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1878     Mreuse = *local;
1879     ierr   = PetscFree(local);CHKERRQ(ierr);
1880   }
1881 
1882   /*
1883       m - number of local rows
1884       n - number of columns (same on all processors)
1885       rstart - first row in new global matrix generated
1886   */
1887   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1888   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1889   m    = m/bs;
1890   n    = n/bs;
1891 
1892   if (call == MAT_INITIAL_MATRIX) {
1893     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1894     ii  = aij->i;
1895     jj  = aij->j;
1896 
1897     /*
1898         Determine the number of non-zeros in the diagonal and off-diagonal
1899         portions of the matrix in order to do correct preallocation
1900     */
1901 
1902     /* first get start and end of "diagonal" columns */
1903     if (csize == PETSC_DECIDE) {
1904       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1905       if (mglobal == n*bs) { /* square matrix */
1906 	nlocal = m;
1907       } else {
1908         nlocal = n/size + ((n % size) > rank);
1909       }
1910     } else {
1911       nlocal = csize/bs;
1912     }
1913     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1914     rstart = rend - nlocal;
1915     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1916 
1917     /* next, compute all the lengths */
1918     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
1919     olens = dlens + m;
1920     for (i=0; i<m; i++) {
1921       jend = ii[i+1] - ii[i];
1922       olen = 0;
1923       dlen = 0;
1924       for (j=0; j<jend; j++) {
1925         if (*jj < rstart || *jj >= rend) olen++;
1926         else dlen++;
1927         jj++;
1928       }
1929       olens[i] = olen;
1930       dlens[i] = dlen;
1931     }
1932     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
1933     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
1934     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
1935     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
1936     ierr = PetscFree(dlens);CHKERRQ(ierr);
1937   } else {
1938     PetscInt ml,nl;
1939 
1940     M = *newmat;
1941     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1942     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1943     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1944     /*
1945          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1946        rather than the slower MatSetValues().
1947     */
1948     M->was_assembled = PETSC_TRUE;
1949     M->assembled     = PETSC_FALSE;
1950   }
1951   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1952   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1953   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1954   ii  = aij->i;
1955   jj  = aij->j;
1956   aa  = aij->a;
1957   for (i=0; i<m; i++) {
1958     row   = rstart/bs + i;
1959     nz    = ii[i+1] - ii[i];
1960     cwork = jj;     jj += nz;
1961     vwork = aa;     aa += nz;
1962     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1963   }
1964 
1965   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1966   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1967   *newmat = M;
1968 
1969   /* save submatrix used in processor for next request */
1970   if (call ==  MAT_INITIAL_MATRIX) {
1971     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1972     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1973   }
1974 
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 #undef __FUNCT__
1979 #define __FUNCT__ "MatPermute_MPIBAIJ"
1980 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
1981 {
1982   MPI_Comm       comm,pcomm;
1983   PetscInt       first,local_size,nrows;
1984   const PetscInt *rows;
1985   PetscMPIInt    size;
1986   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1987   PetscErrorCode ierr;
1988 
1989   PetscFunctionBegin;
1990   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1991   /* make a collective version of 'rowp' */
1992   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1993   if (pcomm==comm) {
1994     crowp = rowp;
1995   } else {
1996     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1997     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1998     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1999     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2000   }
2001   /* collect the global row permutation and invert it */
2002   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2003   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2004   if (pcomm!=comm) {
2005     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2006   }
2007   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2008   /* get the local target indices */
2009   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2010   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2011   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2012   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
2013   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2014   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2015   /* the column permutation is so much easier;
2016      make a local version of 'colp' and invert it */
2017   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2018   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2019   if (size==1) {
2020     lcolp = colp;
2021   } else {
2022     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2023     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2024     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
2025   }
2026   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2027   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2028   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2029   if (size>1) {
2030     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2031     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2032   }
2033   /* now we just get the submatrix */
2034   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2035   /* clean up */
2036   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2037   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 #undef __FUNCT__
2042 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2043 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2044 {
2045   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2046   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2047 
2048   PetscFunctionBegin;
2049   if (nghosts) { *nghosts = B->nbs;}
2050   if (ghosts) {*ghosts = baij->garray;}
2051   PetscFunctionReturn(0);
2052 }
2053 
2054 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2055 
2056 #undef __FUNCT__
2057 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2058 /*
2059     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2060 */
2061 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2062 {
2063   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2064   PetscErrorCode        ierr;
2065   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2066   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2067   const PetscInt        *is;
2068   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2069   PetscInt              *rowhit,M,cstart,cend,colb;
2070   PetscInt              *columnsforrow,l;
2071   IS                    *isa;
2072   PetscTruth             done,flg;
2073   ISLocalToGlobalMapping map = mat->bmapping;
2074   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2075 
2076   PetscFunctionBegin;
2077   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2078   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2079 
2080   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2081   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2082   M                = mat->rmap->n/bs;
2083   cstart           = mat->cmap->rstart/bs;
2084   cend             = mat->cmap->rend/bs;
2085   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2086   c->N             = mat->cmap->N/bs;
2087   c->m             = mat->rmap->n/bs;
2088   c->rstart        = mat->rmap->rstart/bs;
2089 
2090   c->ncolors       = nis;
2091   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2092   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2093   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2094   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2095   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2096   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2097 
2098   /* Allow access to data structures of local part of matrix */
2099   if (!baij->colmap) {
2100     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2101   }
2102   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2103   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2104 
2105   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2106   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2107 
2108   for (i=0; i<nis; i++) {
2109     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2110     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2111     c->ncolumns[i] = n;
2112     if (n) {
2113       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2114       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2115       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2116     } else {
2117       c->columns[i]  = 0;
2118     }
2119 
2120     if (ctype == IS_COLORING_GLOBAL){
2121       /* Determine the total (parallel) number of columns of this color */
2122       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2123       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2124 
2125       nn   = PetscMPIIntCast(n);
2126       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2127       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2128       if (!nctot) {
2129         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2130       }
2131 
2132       disp[0] = 0;
2133       for (j=1; j<size; j++) {
2134         disp[j] = disp[j-1] + ncolsonproc[j-1];
2135       }
2136 
2137       /* Get complete list of columns for color on each processor */
2138       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2139       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2140       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2141     } else if (ctype == IS_COLORING_GHOSTED){
2142       /* Determine local number of columns of this color on this process, including ghost points */
2143       nctot = n;
2144       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2145       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2146     } else {
2147       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2148     }
2149 
2150     /*
2151        Mark all rows affect by these columns
2152     */
2153     /* Temporary option to allow for debugging/testing */
2154     flg  = PETSC_FALSE;
2155     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2156     if (!flg) {/*-----------------------------------------------------------------------------*/
2157       /* crude, fast version */
2158       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2159       /* loop over columns*/
2160       for (j=0; j<nctot; j++) {
2161         if (ctype == IS_COLORING_GHOSTED) {
2162           col = ltog[cols[j]];
2163         } else {
2164           col  = cols[j];
2165         }
2166         if (col >= cstart && col < cend) {
2167           /* column is in diagonal block of matrix */
2168           rows = A_cj + A_ci[col-cstart];
2169           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2170         } else {
2171 #if defined (PETSC_USE_CTABLE)
2172           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2173 	  colb --;
2174 #else
2175           colb = baij->colmap[col] - 1;
2176 #endif
2177           if (colb == -1) {
2178             m = 0;
2179           } else {
2180             colb = colb/bs;
2181             rows = B_cj + B_ci[colb];
2182             m    = B_ci[colb+1] - B_ci[colb];
2183           }
2184         }
2185         /* loop over columns marking them in rowhit */
2186         for (k=0; k<m; k++) {
2187           rowhit[*rows++] = col + 1;
2188         }
2189       }
2190 
2191       /* count the number of hits */
2192       nrows = 0;
2193       for (j=0; j<M; j++) {
2194         if (rowhit[j]) nrows++;
2195       }
2196       c->nrows[i]         = nrows;
2197       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2198       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2199       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2200       nrows = 0;
2201       for (j=0; j<M; j++) {
2202         if (rowhit[j]) {
2203           c->rows[i][nrows]           = j;
2204           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2205           nrows++;
2206         }
2207       }
2208     } else {/*-------------------------------------------------------------------------------*/
2209       /* slow version, using rowhit as a linked list */
2210       PetscInt currentcol,fm,mfm;
2211       rowhit[M] = M;
2212       nrows     = 0;
2213       /* loop over columns*/
2214       for (j=0; j<nctot; j++) {
2215         if (ctype == IS_COLORING_GHOSTED) {
2216           col = ltog[cols[j]];
2217         } else {
2218           col  = cols[j];
2219         }
2220         if (col >= cstart && col < cend) {
2221           /* column is in diagonal block of matrix */
2222           rows = A_cj + A_ci[col-cstart];
2223           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2224         } else {
2225 #if defined (PETSC_USE_CTABLE)
2226           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2227           colb --;
2228 #else
2229           colb = baij->colmap[col] - 1;
2230 #endif
2231           if (colb == -1) {
2232             m = 0;
2233           } else {
2234             colb = colb/bs;
2235             rows = B_cj + B_ci[colb];
2236             m    = B_ci[colb+1] - B_ci[colb];
2237           }
2238         }
2239 
2240         /* loop over columns marking them in rowhit */
2241         fm    = M; /* fm points to first entry in linked list */
2242         for (k=0; k<m; k++) {
2243           currentcol = *rows++;
2244 	  /* is it already in the list? */
2245           do {
2246             mfm  = fm;
2247             fm   = rowhit[fm];
2248           } while (fm < currentcol);
2249           /* not in list so add it */
2250           if (fm != currentcol) {
2251             nrows++;
2252             columnsforrow[currentcol] = col;
2253             /* next three lines insert new entry into linked list */
2254             rowhit[mfm]               = currentcol;
2255             rowhit[currentcol]        = fm;
2256             fm                        = currentcol;
2257             /* fm points to present position in list since we know the columns are sorted */
2258           } else {
2259             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2260           }
2261         }
2262       }
2263       c->nrows[i]         = nrows;
2264       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2265       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2266       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2267       /* now store the linked list of rows into c->rows[i] */
2268       nrows = 0;
2269       fm    = rowhit[M];
2270       do {
2271         c->rows[i][nrows]            = fm;
2272         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2273         fm                           = rowhit[fm];
2274       } while (fm < M);
2275     } /* ---------------------------------------------------------------------------------------*/
2276     ierr = PetscFree(cols);CHKERRQ(ierr);
2277   }
2278 
2279   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2280   /*
2281        vscale will contain the "diagonal" on processor scalings followed by the off processor
2282   */
2283   if (ctype == IS_COLORING_GLOBAL) {
2284     PetscInt *garray;
2285     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2286     for (i=0; i<baij->B->cmap->n/bs; i++) {
2287       for (j=0; j<bs; j++) {
2288         garray[i*bs+j] = bs*baij->garray[i]+j;
2289       }
2290     }
2291     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2292     ierr = PetscFree(garray);CHKERRQ(ierr);
2293     CHKMEMQ;
2294     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2295     for (k=0; k<c->ncolors; k++) {
2296       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2297       for (l=0; l<c->nrows[k]; l++) {
2298         col = c->columnsforrow[k][l];
2299         if (col >= cstart && col < cend) {
2300           /* column is in diagonal block of matrix */
2301           colb = col - cstart;
2302         } else {
2303           /* column  is in "off-processor" part */
2304 #if defined (PETSC_USE_CTABLE)
2305           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2306           colb --;
2307 #else
2308           colb = baij->colmap[col] - 1;
2309 #endif
2310           colb = colb/bs;
2311           colb += cend - cstart;
2312         }
2313         c->vscaleforrow[k][l] = colb;
2314       }
2315     }
2316   } else if (ctype == IS_COLORING_GHOSTED) {
2317     /* Get gtol mapping */
2318     PetscInt N = mat->cmap->N, *gtol;
2319     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2320     for (i=0; i<N; i++) gtol[i] = -1;
2321     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2322 
2323     c->vscale = 0; /* will be created in MatFDColoringApply() */
2324     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2325     for (k=0; k<c->ncolors; k++) {
2326       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2327       for (l=0; l<c->nrows[k]; l++) {
2328         col = c->columnsforrow[k][l];      /* global column index */
2329         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2330       }
2331     }
2332     ierr = PetscFree(gtol);CHKERRQ(ierr);
2333   }
2334   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2335 
2336   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2337   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2338   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2339   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2340     CHKMEMQ;
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 #undef __FUNCT__
2345 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2346 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2347 {
2348   Mat            B;
2349   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2350   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2351   Mat_SeqAIJ     *b;
2352   PetscErrorCode ierr;
2353   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2354   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2355   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2356 
2357   PetscFunctionBegin;
2358   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2359   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2360 
2361   /* ----------------------------------------------------------------
2362      Tell every processor the number of nonzeros per row
2363   */
2364   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2365   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2366     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2367   }
2368   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2369   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2370   displs     = recvcounts + size;
2371   for (i=0; i<size; i++) {
2372     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2373     displs[i]     = A->rmap->range[i]/bs;
2374   }
2375 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2376   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2377 #else
2378   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2379 #endif
2380   /* ---------------------------------------------------------------
2381      Create the sequential matrix of the same type as the local block diagonal
2382   */
2383   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2384   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2385   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2386   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2387   b = (Mat_SeqAIJ *)B->data;
2388 
2389   /*--------------------------------------------------------------------
2390     Copy my part of matrix column indices over
2391   */
2392   sendcount  = ad->nz + bd->nz;
2393   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2394   a_jsendbuf = ad->j;
2395   b_jsendbuf = bd->j;
2396   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2397   cnt        = 0;
2398   for (i=0; i<n; i++) {
2399 
2400     /* put in lower diagonal portion */
2401     m = bd->i[i+1] - bd->i[i];
2402     while (m > 0) {
2403       /* is it above diagonal (in bd (compressed) numbering) */
2404       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2405       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2406       m--;
2407     }
2408 
2409     /* put in diagonal portion */
2410     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2411       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2412     }
2413 
2414     /* put in upper diagonal portion */
2415     while (m-- > 0) {
2416       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2417     }
2418   }
2419   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2420 
2421   /*--------------------------------------------------------------------
2422     Gather all column indices to all processors
2423   */
2424   for (i=0; i<size; i++) {
2425     recvcounts[i] = 0;
2426     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2427       recvcounts[i] += lens[j];
2428     }
2429   }
2430   displs[0]  = 0;
2431   for (i=1; i<size; i++) {
2432     displs[i] = displs[i-1] + recvcounts[i-1];
2433   }
2434 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2435   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2436 #else
2437   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2438 #endif
2439   /*--------------------------------------------------------------------
2440     Assemble the matrix into useable form (note numerical values not yet set)
2441   */
2442   /* set the b->ilen (length of each row) values */
2443   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2444   /* set the b->i indices */
2445   b->i[0] = 0;
2446   for (i=1; i<=A->rmap->N/bs; i++) {
2447     b->i[i] = b->i[i-1] + lens[i-1];
2448   }
2449   ierr = PetscFree(lens);CHKERRQ(ierr);
2450   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2451   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2452   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2453 
2454   if (A->symmetric){
2455     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2456   } else if (A->hermitian) {
2457     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2458   } else if (A->structurally_symmetric) {
2459     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2460   }
2461   *newmat = B;
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 #undef __FUNCT__
2466 #define __FUNCT__ "MatSOR_MPIBAIJ"
2467 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2468 {
2469   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2470   PetscErrorCode ierr;
2471   Vec            bb1 = 0;
2472 
2473   PetscFunctionBegin;
2474   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2475     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2476   }
2477 
2478   if (flag == SOR_APPLY_UPPER) {
2479     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2480     PetscFunctionReturn(0);
2481   }
2482 
2483   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2484     if (flag & SOR_ZERO_INITIAL_GUESS) {
2485       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2486       its--;
2487     }
2488 
2489     while (its--) {
2490       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2491       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2492 
2493       /* update rhs: bb1 = bb - B*x */
2494       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2495       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2496 
2497       /* local sweep */
2498       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2499     }
2500   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2501     if (flag & SOR_ZERO_INITIAL_GUESS) {
2502       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2503       its--;
2504     }
2505     while (its--) {
2506       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2507       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2508 
2509       /* update rhs: bb1 = bb - B*x */
2510       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2511       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2512 
2513       /* local sweep */
2514       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2515     }
2516   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2517     if (flag & SOR_ZERO_INITIAL_GUESS) {
2518       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2519       its--;
2520     }
2521     while (its--) {
2522       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2523       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2524 
2525       /* update rhs: bb1 = bb - B*x */
2526       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2527       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2528 
2529       /* local sweep */
2530       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2531     }
2532   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2533 
2534   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2539 
2540 
2541 /* -------------------------------------------------------------------*/
2542 static struct _MatOps MatOps_Values = {
2543        MatSetValues_MPIBAIJ,
2544        MatGetRow_MPIBAIJ,
2545        MatRestoreRow_MPIBAIJ,
2546        MatMult_MPIBAIJ,
2547 /* 4*/ MatMultAdd_MPIBAIJ,
2548        MatMultTranspose_MPIBAIJ,
2549        MatMultTransposeAdd_MPIBAIJ,
2550        0,
2551        0,
2552        0,
2553 /*10*/ 0,
2554        0,
2555        0,
2556        MatSOR_MPIBAIJ,
2557        MatTranspose_MPIBAIJ,
2558 /*15*/ MatGetInfo_MPIBAIJ,
2559        MatEqual_MPIBAIJ,
2560        MatGetDiagonal_MPIBAIJ,
2561        MatDiagonalScale_MPIBAIJ,
2562        MatNorm_MPIBAIJ,
2563 /*20*/ MatAssemblyBegin_MPIBAIJ,
2564        MatAssemblyEnd_MPIBAIJ,
2565        MatSetOption_MPIBAIJ,
2566        MatZeroEntries_MPIBAIJ,
2567 /*24*/ MatZeroRows_MPIBAIJ,
2568        0,
2569        0,
2570        0,
2571        0,
2572 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2573        0,
2574        0,
2575        0,
2576        0,
2577 /*34*/ MatDuplicate_MPIBAIJ,
2578        0,
2579        0,
2580        0,
2581        0,
2582 /*39*/ MatAXPY_MPIBAIJ,
2583        MatGetSubMatrices_MPIBAIJ,
2584        MatIncreaseOverlap_MPIBAIJ,
2585        MatGetValues_MPIBAIJ,
2586        MatCopy_MPIBAIJ,
2587 /*44*/ 0,
2588        MatScale_MPIBAIJ,
2589        0,
2590        0,
2591        0,
2592 /*49*/ MatSetBlockSize_MPIBAIJ,
2593        0,
2594        0,
2595        0,
2596        0,
2597 /*54*/ MatFDColoringCreate_MPIBAIJ,
2598        0,
2599        MatSetUnfactored_MPIBAIJ,
2600        MatPermute_MPIBAIJ,
2601        MatSetValuesBlocked_MPIBAIJ,
2602 /*59*/ MatGetSubMatrix_MPIBAIJ,
2603        MatDestroy_MPIBAIJ,
2604        MatView_MPIBAIJ,
2605        0,
2606        0,
2607 /*64*/ 0,
2608        0,
2609        0,
2610        0,
2611        0,
2612 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2613        0,
2614        0,
2615        0,
2616        0,
2617 /*74*/ 0,
2618        MatFDColoringApply_BAIJ,
2619        0,
2620        0,
2621        0,
2622 /*79*/ 0,
2623        0,
2624        0,
2625        0,
2626        MatLoad_MPIBAIJ,
2627 /*84*/ 0,
2628        0,
2629        0,
2630        0,
2631        0,
2632 /*89*/ 0,
2633        0,
2634        0,
2635        0,
2636        0,
2637 /*94*/ 0,
2638        0,
2639        0,
2640        0,
2641        0,
2642 /*99*/ 0,
2643        0,
2644        0,
2645        0,
2646        0,
2647 /*104*/0,
2648        MatRealPart_MPIBAIJ,
2649        MatImaginaryPart_MPIBAIJ,
2650        0,
2651        0,
2652 /*109*/0,
2653        0,
2654        0,
2655        0,
2656        0,
2657 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2658        0,
2659        MatGetGhosts_MPIBAIJ,
2660        0,
2661        0,
2662 /*119*/0,
2663        0,
2664        0,
2665        0
2666 };
2667 
2668 EXTERN_C_BEGIN
2669 #undef __FUNCT__
2670 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2671 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2672 {
2673   PetscFunctionBegin;
2674   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2675   *iscopy = PETSC_FALSE;
2676   PetscFunctionReturn(0);
2677 }
2678 EXTERN_C_END
2679 
2680 EXTERN_C_BEGIN
2681 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2682 EXTERN_C_END
2683 
2684 EXTERN_C_BEGIN
2685 #undef __FUNCT__
2686 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2687 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2688 {
2689   PetscInt       m,rstart,cstart,cend;
2690   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2691   const PetscInt *JJ=0;
2692   PetscScalar    *values=0;
2693   PetscErrorCode ierr;
2694 
2695   PetscFunctionBegin;
2696 
2697   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2698   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2699   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2700   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2701   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2702   m      = B->rmap->n/bs;
2703   rstart = B->rmap->rstart/bs;
2704   cstart = B->cmap->rstart/bs;
2705   cend   = B->cmap->rend/bs;
2706 
2707   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2708   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2709   for (i=0; i<m; i++) {
2710     nz = ii[i+1] - ii[i];
2711     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2712     nz_max = PetscMax(nz_max,nz);
2713     JJ  = jj + ii[i];
2714     for (j=0; j<nz; j++) {
2715       if (*JJ >= cstart) break;
2716       JJ++;
2717     }
2718     d = 0;
2719     for (; j<nz; j++) {
2720       if (*JJ++ >= cend) break;
2721       d++;
2722     }
2723     d_nnz[i] = d;
2724     o_nnz[i] = nz - d;
2725   }
2726   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2727   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2728 
2729   values = (PetscScalar*)V;
2730   if (!values) {
2731     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2732     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2733   }
2734   for (i=0; i<m; i++) {
2735     PetscInt          row    = i + rstart;
2736     PetscInt          ncols  = ii[i+1] - ii[i];
2737     const PetscInt    *icols = jj + ii[i];
2738     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2739     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2740   }
2741 
2742   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2743   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2744   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2745 
2746   PetscFunctionReturn(0);
2747 }
2748 EXTERN_C_END
2749 
2750 #undef __FUNCT__
2751 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2752 /*@C
2753    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2754    (the default parallel PETSc format).
2755 
2756    Collective on MPI_Comm
2757 
2758    Input Parameters:
2759 +  A - the matrix
2760 .  i - the indices into j for the start of each local row (starts with zero)
2761 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2762 -  v - optional values in the matrix
2763 
2764    Level: developer
2765 
2766 .keywords: matrix, aij, compressed row, sparse, parallel
2767 
2768 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2769 @*/
2770 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2771 {
2772   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2773 
2774   PetscFunctionBegin;
2775   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2776   if (f) {
2777     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2778   }
2779   PetscFunctionReturn(0);
2780 }
2781 
2782 EXTERN_C_BEGIN
2783 #undef __FUNCT__
2784 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2785 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2786 {
2787   Mat_MPIBAIJ    *b;
2788   PetscErrorCode ierr;
2789   PetscInt       i, newbs = PetscAbs(bs);
2790 
2791   PetscFunctionBegin;
2792   if (bs < 0) {
2793     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2794       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2795     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2796     bs   = PetscAbs(bs);
2797   }
2798   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2799   bs = newbs;
2800 
2801 
2802   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2803   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2804   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2805   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2806   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2807 
2808   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2809   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2810   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2811   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2812 
2813   if (d_nnz) {
2814     for (i=0; i<B->rmap->n/bs; i++) {
2815       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2816     }
2817   }
2818   if (o_nnz) {
2819     for (i=0; i<B->rmap->n/bs; i++) {
2820       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2821     }
2822   }
2823 
2824   b = (Mat_MPIBAIJ*)B->data;
2825   b->bs2 = bs*bs;
2826   b->mbs = B->rmap->n/bs;
2827   b->nbs = B->cmap->n/bs;
2828   b->Mbs = B->rmap->N/bs;
2829   b->Nbs = B->cmap->N/bs;
2830 
2831   for (i=0; i<=b->size; i++) {
2832     b->rangebs[i] = B->rmap->range[i]/bs;
2833   }
2834   b->rstartbs = B->rmap->rstart/bs;
2835   b->rendbs   = B->rmap->rend/bs;
2836   b->cstartbs = B->cmap->rstart/bs;
2837   b->cendbs   = B->cmap->rend/bs;
2838 
2839   if (!B->preallocated) {
2840     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2841     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2842     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2843     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2844     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2845     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2846     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2847     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2848     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2849   }
2850 
2851   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2852   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2853   B->preallocated = PETSC_TRUE;
2854   PetscFunctionReturn(0);
2855 }
2856 EXTERN_C_END
2857 
2858 EXTERN_C_BEGIN
2859 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2860 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2861 EXTERN_C_END
2862 
2863 
2864 EXTERN_C_BEGIN
2865 #undef __FUNCT__
2866 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2867 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
2868 {
2869   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2870   PetscErrorCode ierr;
2871   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2872   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2873   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2874 
2875   PetscFunctionBegin;
2876   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
2877   ii[0] = 0;
2878   CHKMEMQ;
2879   for (i=0; i<M; i++) {
2880     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2881     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2882     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2883     /* remove one from count of matrix has diagonal */
2884     for (j=id[i]; j<id[i+1]; j++) {
2885       if (jd[j] == i) {ii[i+1]--;break;}
2886     }
2887   CHKMEMQ;
2888   }
2889   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2890   cnt = 0;
2891   for (i=0; i<M; i++) {
2892     for (j=io[i]; j<io[i+1]; j++) {
2893       if (garray[jo[j]] > rstart) break;
2894       jj[cnt++] = garray[jo[j]];
2895   CHKMEMQ;
2896     }
2897     for (k=id[i]; k<id[i+1]; k++) {
2898       if (jd[k] != i) {
2899         jj[cnt++] = rstart + jd[k];
2900   CHKMEMQ;
2901       }
2902     }
2903     for (;j<io[i+1]; j++) {
2904       jj[cnt++] = garray[jo[j]];
2905   CHKMEMQ;
2906     }
2907   }
2908   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
2909   PetscFunctionReturn(0);
2910 }
2911 EXTERN_C_END
2912 
2913 EXTERN_C_BEGIN
2914 #if defined(PETSC_HAVE_MUMPS)
2915 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2916 #endif
2917 EXTERN_C_END
2918 
2919 /*MC
2920    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2921 
2922    Options Database Keys:
2923 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2924 . -mat_block_size <bs> - set the blocksize used to store the matrix
2925 - -mat_use_hash_table <fact>
2926 
2927   Level: beginner
2928 
2929 .seealso: MatCreateMPIBAIJ
2930 M*/
2931 
2932 EXTERN_C_BEGIN
2933 #undef __FUNCT__
2934 #define __FUNCT__ "MatCreate_MPIBAIJ"
2935 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2936 {
2937   Mat_MPIBAIJ    *b;
2938   PetscErrorCode ierr;
2939   PetscTruth     flg;
2940 
2941   PetscFunctionBegin;
2942   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2943   B->data = (void*)b;
2944 
2945 
2946   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2947   B->mapping    = 0;
2948   B->assembled  = PETSC_FALSE;
2949 
2950   B->insertmode = NOT_SET_VALUES;
2951   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2952   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2953 
2954   /* build local table of row and column ownerships */
2955   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2956 
2957   /* build cache for off array entries formed */
2958   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2959   b->donotstash  = PETSC_FALSE;
2960   b->colmap      = PETSC_NULL;
2961   b->garray      = PETSC_NULL;
2962   b->roworiented = PETSC_TRUE;
2963 
2964   /* stuff used in block assembly */
2965   b->barray       = 0;
2966 
2967   /* stuff used for matrix vector multiply */
2968   b->lvec         = 0;
2969   b->Mvctx        = 0;
2970 
2971   /* stuff for MatGetRow() */
2972   b->rowindices   = 0;
2973   b->rowvalues    = 0;
2974   b->getrowactive = PETSC_FALSE;
2975 
2976   /* hash table stuff */
2977   b->ht           = 0;
2978   b->hd           = 0;
2979   b->ht_size      = 0;
2980   b->ht_flag      = PETSC_FALSE;
2981   b->ht_fact      = 0;
2982   b->ht_total_ct  = 0;
2983   b->ht_insert_ct = 0;
2984 
2985   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2986     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2987     if (flg) {
2988       PetscReal fact = 1.39;
2989       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2990       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2991       if (fact <= 1.0) fact = 1.39;
2992       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2993       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2994     }
2995   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2996 
2997 #if defined(PETSC_HAVE_MUMPS)
2998   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
2999 #endif
3000   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3001                                      "MatConvert_MPIBAIJ_MPIAdj",
3002                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3003   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3004                                      "MatStoreValues_MPIBAIJ",
3005                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3006   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3007                                      "MatRetrieveValues_MPIBAIJ",
3008                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3009   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3010                                      "MatGetDiagonalBlock_MPIBAIJ",
3011                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3012   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3013                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3014                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3015   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3016 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3017 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3018   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3019                                      "MatDiagonalScaleLocal_MPIBAIJ",
3020                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3021   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3022                                      "MatSetHashTableFactor_MPIBAIJ",
3023                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3024   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3025   PetscFunctionReturn(0);
3026 }
3027 EXTERN_C_END
3028 
3029 /*MC
3030    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3031 
3032    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3033    and MATMPIBAIJ otherwise.
3034 
3035    Options Database Keys:
3036 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3037 
3038   Level: beginner
3039 
3040 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3041 M*/
3042 
3043 EXTERN_C_BEGIN
3044 #undef __FUNCT__
3045 #define __FUNCT__ "MatCreate_BAIJ"
3046 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3047 {
3048   PetscErrorCode ierr;
3049   PetscMPIInt    size;
3050 
3051   PetscFunctionBegin;
3052   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3053   if (size == 1) {
3054     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3055   } else {
3056     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3057   }
3058   PetscFunctionReturn(0);
3059 }
3060 EXTERN_C_END
3061 
3062 #undef __FUNCT__
3063 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3064 /*@C
3065    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3066    (block compressed row).  For good matrix assembly performance
3067    the user should preallocate the matrix storage by setting the parameters
3068    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3069    performance can be increased by more than a factor of 50.
3070 
3071    Collective on Mat
3072 
3073    Input Parameters:
3074 +  A - the matrix
3075 .  bs   - size of blockk
3076 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3077            submatrix  (same for all local rows)
3078 .  d_nnz - array containing the number of block nonzeros in the various block rows
3079            of the in diagonal portion of the local (possibly different for each block
3080            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3081 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3082            submatrix (same for all local rows).
3083 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3084            off-diagonal portion of the local submatrix (possibly different for
3085            each block row) or PETSC_NULL.
3086 
3087    If the *_nnz parameter is given then the *_nz parameter is ignored
3088 
3089    Options Database Keys:
3090 +   -mat_block_size - size of the blocks to use
3091 -   -mat_use_hash_table <fact>
3092 
3093    Notes:
3094    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3095    than it must be used on all processors that share the object for that argument.
3096 
3097    Storage Information:
3098    For a square global matrix we define each processor's diagonal portion
3099    to be its local rows and the corresponding columns (a square submatrix);
3100    each processor's off-diagonal portion encompasses the remainder of the
3101    local matrix (a rectangular submatrix).
3102 
3103    The user can specify preallocated storage for the diagonal part of
3104    the local submatrix with either d_nz or d_nnz (not both).  Set
3105    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3106    memory allocation.  Likewise, specify preallocated storage for the
3107    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3108 
3109    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3110    the figure below we depict these three local rows and all columns (0-11).
3111 
3112 .vb
3113            0 1 2 3 4 5 6 7 8 9 10 11
3114           -------------------
3115    row 3  |  o o o d d d o o o o o o
3116    row 4  |  o o o d d d o o o o o o
3117    row 5  |  o o o d d d o o o o o o
3118           -------------------
3119 .ve
3120 
3121    Thus, any entries in the d locations are stored in the d (diagonal)
3122    submatrix, and any entries in the o locations are stored in the
3123    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3124    stored simply in the MATSEQBAIJ format for compressed row storage.
3125 
3126    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3127    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3128    In general, for PDE problems in which most nonzeros are near the diagonal,
3129    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3130    or you will get TERRIBLE performance; see the users' manual chapter on
3131    matrices.
3132 
3133    You can call MatGetInfo() to get information on how effective the preallocation was;
3134    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3135    You can also run with the option -info and look for messages with the string
3136    malloc in them to see if additional memory allocation was needed.
3137 
3138    Level: intermediate
3139 
3140 .keywords: matrix, block, aij, compressed row, sparse, parallel
3141 
3142 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3143 @*/
3144 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3145 {
3146   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3147 
3148   PetscFunctionBegin;
3149   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3150   if (f) {
3151     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3152   }
3153   PetscFunctionReturn(0);
3154 }
3155 
3156 #undef __FUNCT__
3157 #define __FUNCT__ "MatCreateMPIBAIJ"
3158 /*@C
3159    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3160    (block compressed row).  For good matrix assembly performance
3161    the user should preallocate the matrix storage by setting the parameters
3162    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3163    performance can be increased by more than a factor of 50.
3164 
3165    Collective on MPI_Comm
3166 
3167    Input Parameters:
3168 +  comm - MPI communicator
3169 .  bs   - size of blockk
3170 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3171            This value should be the same as the local size used in creating the
3172            y vector for the matrix-vector product y = Ax.
3173 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3174            This value should be the same as the local size used in creating the
3175            x vector for the matrix-vector product y = Ax.
3176 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3177 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3178 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3179            submatrix  (same for all local rows)
3180 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3181            of the in diagonal portion of the local (possibly different for each block
3182            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3183 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3184            submatrix (same for all local rows).
3185 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3186            off-diagonal portion of the local submatrix (possibly different for
3187            each block row) or PETSC_NULL.
3188 
3189    Output Parameter:
3190 .  A - the matrix
3191 
3192    Options Database Keys:
3193 +   -mat_block_size - size of the blocks to use
3194 -   -mat_use_hash_table <fact>
3195 
3196    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3197    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3198    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3199 
3200    Notes:
3201    If the *_nnz parameter is given then the *_nz parameter is ignored
3202 
3203    A nonzero block is any block that as 1 or more nonzeros in it
3204 
3205    The user MUST specify either the local or global matrix dimensions
3206    (possibly both).
3207 
3208    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3209    than it must be used on all processors that share the object for that argument.
3210 
3211    Storage Information:
3212    For a square global matrix we define each processor's diagonal portion
3213    to be its local rows and the corresponding columns (a square submatrix);
3214    each processor's off-diagonal portion encompasses the remainder of the
3215    local matrix (a rectangular submatrix).
3216 
3217    The user can specify preallocated storage for the diagonal part of
3218    the local submatrix with either d_nz or d_nnz (not both).  Set
3219    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3220    memory allocation.  Likewise, specify preallocated storage for the
3221    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3222 
3223    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3224    the figure below we depict these three local rows and all columns (0-11).
3225 
3226 .vb
3227            0 1 2 3 4 5 6 7 8 9 10 11
3228           -------------------
3229    row 3  |  o o o d d d o o o o o o
3230    row 4  |  o o o d d d o o o o o o
3231    row 5  |  o o o d d d o o o o o o
3232           -------------------
3233 .ve
3234 
3235    Thus, any entries in the d locations are stored in the d (diagonal)
3236    submatrix, and any entries in the o locations are stored in the
3237    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3238    stored simply in the MATSEQBAIJ format for compressed row storage.
3239 
3240    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3241    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3242    In general, for PDE problems in which most nonzeros are near the diagonal,
3243    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3244    or you will get TERRIBLE performance; see the users' manual chapter on
3245    matrices.
3246 
3247    Level: intermediate
3248 
3249 .keywords: matrix, block, aij, compressed row, sparse, parallel
3250 
3251 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3252 @*/
3253 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3254 {
3255   PetscErrorCode ierr;
3256   PetscMPIInt    size;
3257 
3258   PetscFunctionBegin;
3259   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3260   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3261   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3262   if (size > 1) {
3263     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3264     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3265   } else {
3266     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3267     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3268   }
3269   PetscFunctionReturn(0);
3270 }
3271 
3272 #undef __FUNCT__
3273 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3274 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3275 {
3276   Mat            mat;
3277   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3278   PetscErrorCode ierr;
3279   PetscInt       len=0;
3280 
3281   PetscFunctionBegin;
3282   *newmat       = 0;
3283   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3284   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3285   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3286   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3287 
3288   mat->factortype   = matin->factortype;
3289   mat->preallocated = PETSC_TRUE;
3290   mat->assembled    = PETSC_TRUE;
3291   mat->insertmode   = NOT_SET_VALUES;
3292 
3293   a      = (Mat_MPIBAIJ*)mat->data;
3294   mat->rmap->bs  = matin->rmap->bs;
3295   a->bs2   = oldmat->bs2;
3296   a->mbs   = oldmat->mbs;
3297   a->nbs   = oldmat->nbs;
3298   a->Mbs   = oldmat->Mbs;
3299   a->Nbs   = oldmat->Nbs;
3300 
3301   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3302   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3303 
3304   a->size         = oldmat->size;
3305   a->rank         = oldmat->rank;
3306   a->donotstash   = oldmat->donotstash;
3307   a->roworiented  = oldmat->roworiented;
3308   a->rowindices   = 0;
3309   a->rowvalues    = 0;
3310   a->getrowactive = PETSC_FALSE;
3311   a->barray       = 0;
3312   a->rstartbs     = oldmat->rstartbs;
3313   a->rendbs       = oldmat->rendbs;
3314   a->cstartbs     = oldmat->cstartbs;
3315   a->cendbs       = oldmat->cendbs;
3316 
3317   /* hash table stuff */
3318   a->ht           = 0;
3319   a->hd           = 0;
3320   a->ht_size      = 0;
3321   a->ht_flag      = oldmat->ht_flag;
3322   a->ht_fact      = oldmat->ht_fact;
3323   a->ht_total_ct  = 0;
3324   a->ht_insert_ct = 0;
3325 
3326   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3327   if (oldmat->colmap) {
3328 #if defined (PETSC_USE_CTABLE)
3329   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3330 #else
3331   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3332   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3333   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3334 #endif
3335   } else a->colmap = 0;
3336 
3337   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3338     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3339     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3340     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3341   } else a->garray = 0;
3342 
3343   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3344   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3345   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3346   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3347   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3348 
3349   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3350   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3351   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3352   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3353   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3354   *newmat = mat;
3355 
3356   PetscFunctionReturn(0);
3357 }
3358 
3359 #undef __FUNCT__
3360 #define __FUNCT__ "MatLoad_MPIBAIJ"
3361 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3362 {
3363   PetscErrorCode ierr;
3364   int            fd;
3365   PetscInt       i,nz,j,rstart,rend;
3366   PetscScalar    *vals,*buf;
3367   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3368   MPI_Status     status;
3369   PetscMPIInt    rank,size,maxnz;
3370   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3371   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3372   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3373   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3374   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3375   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3376 
3377   PetscFunctionBegin;
3378   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3379     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3380   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3381 
3382   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3383   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3384   if (!rank) {
3385     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3386     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3387     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3388   }
3389 
3390   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3391 
3392   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3393   M = header[1]; N = header[2];
3394 
3395   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3396   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3397   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3398 
3399   /* If global sizes are set, check if they are consistent with that given in the file */
3400   if (sizesset) {
3401     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3402   }
3403   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3404   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3405 
3406   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3407 
3408   /*
3409      This code adds extra rows to make sure the number of rows is
3410      divisible by the blocksize
3411   */
3412   Mbs        = M/bs;
3413   extra_rows = bs - M + bs*Mbs;
3414   if (extra_rows == bs) extra_rows = 0;
3415   else                  Mbs++;
3416   if (extra_rows && !rank) {
3417     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3418   }
3419 
3420   /* determine ownership of all rows */
3421   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3422     mbs        = Mbs/size + ((Mbs % size) > rank);
3423     m          = mbs*bs;
3424   } else { /* User set */
3425     m          = newmat->rmap->n;
3426     mbs        = m/bs;
3427   }
3428   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3429   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3430 
3431   /* process 0 needs enough room for process with most rows */
3432   if (!rank) {
3433     mmax = rowners[1];
3434     for (i=2; i<size; i++) {
3435       mmax = PetscMax(mmax,rowners[i]);
3436     }
3437     mmax*=bs;
3438   } else mmax = m;
3439 
3440   rowners[0] = 0;
3441   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3442   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3443   rstart = rowners[rank];
3444   rend   = rowners[rank+1];
3445 
3446   /* distribute row lengths to all processors */
3447   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3448   if (!rank) {
3449     mend = m;
3450     if (size == 1) mend = mend - extra_rows;
3451     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3452     for (j=mend; j<m; j++) locrowlens[j] = 1;
3453     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3454     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3455     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3456     for (j=0; j<m; j++) {
3457       procsnz[0] += locrowlens[j];
3458     }
3459     for (i=1; i<size; i++) {
3460       mend = browners[i+1] - browners[i];
3461       if (i == size-1) mend = mend - extra_rows;
3462       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3463       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3464       /* calculate the number of nonzeros on each processor */
3465       for (j=0; j<browners[i+1]-browners[i]; j++) {
3466         procsnz[i] += rowlengths[j];
3467       }
3468       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3469     }
3470     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3471   } else {
3472     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3473   }
3474 
3475   if (!rank) {
3476     /* determine max buffer needed and allocate it */
3477     maxnz = procsnz[0];
3478     for (i=1; i<size; i++) {
3479       maxnz = PetscMax(maxnz,procsnz[i]);
3480     }
3481     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3482 
3483     /* read in my part of the matrix column indices  */
3484     nz     = procsnz[0];
3485     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3486     mycols = ibuf;
3487     if (size == 1)  nz -= extra_rows;
3488     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3489     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3490 
3491     /* read in every ones (except the last) and ship off */
3492     for (i=1; i<size-1; i++) {
3493       nz   = procsnz[i];
3494       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3495       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3496     }
3497     /* read in the stuff for the last proc */
3498     if (size != 1) {
3499       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3500       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3501       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3502       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3503     }
3504     ierr = PetscFree(cols);CHKERRQ(ierr);
3505   } else {
3506     /* determine buffer space needed for message */
3507     nz = 0;
3508     for (i=0; i<m; i++) {
3509       nz += locrowlens[i];
3510     }
3511     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3512     mycols = ibuf;
3513     /* receive message of column indices*/
3514     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3515     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3516     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3517   }
3518 
3519   /* loop over local rows, determining number of off diagonal entries */
3520   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3521   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3522   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3523   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3524   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3525   rowcount = 0; nzcount = 0;
3526   for (i=0; i<mbs; i++) {
3527     dcount  = 0;
3528     odcount = 0;
3529     for (j=0; j<bs; j++) {
3530       kmax = locrowlens[rowcount];
3531       for (k=0; k<kmax; k++) {
3532         tmp = mycols[nzcount++]/bs;
3533         if (!mask[tmp]) {
3534           mask[tmp] = 1;
3535           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3536           else masked1[dcount++] = tmp;
3537         }
3538       }
3539       rowcount++;
3540     }
3541 
3542     dlens[i]  = dcount;
3543     odlens[i] = odcount;
3544 
3545     /* zero out the mask elements we set */
3546     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3547     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3548   }
3549 
3550 
3551   if (!sizesset) {
3552     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3553   }
3554   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3555 
3556   if (!rank) {
3557     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3558     /* read in my part of the matrix numerical values  */
3559     nz = procsnz[0];
3560     vals = buf;
3561     mycols = ibuf;
3562     if (size == 1)  nz -= extra_rows;
3563     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3564     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3565 
3566     /* insert into matrix */
3567     jj      = rstart*bs;
3568     for (i=0; i<m; i++) {
3569       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3570       mycols += locrowlens[i];
3571       vals   += locrowlens[i];
3572       jj++;
3573     }
3574     /* read in other processors (except the last one) and ship out */
3575     for (i=1; i<size-1; i++) {
3576       nz   = procsnz[i];
3577       vals = buf;
3578       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3579       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3580     }
3581     /* the last proc */
3582     if (size != 1){
3583       nz   = procsnz[i] - extra_rows;
3584       vals = buf;
3585       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3586       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3587       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3588     }
3589     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3590   } else {
3591     /* receive numeric values */
3592     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3593 
3594     /* receive message of values*/
3595     vals   = buf;
3596     mycols = ibuf;
3597     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3598     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3599     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3600 
3601     /* insert into matrix */
3602     jj      = rstart*bs;
3603     for (i=0; i<m; i++) {
3604       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3605       mycols += locrowlens[i];
3606       vals   += locrowlens[i];
3607       jj++;
3608     }
3609   }
3610   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3611   ierr = PetscFree(buf);CHKERRQ(ierr);
3612   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3613   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3614   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3615   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3616   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3617   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3618 
3619   PetscFunctionReturn(0);
3620 }
3621 
3622 #undef __FUNCT__
3623 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3624 /*@
3625    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3626 
3627    Input Parameters:
3628 .  mat  - the matrix
3629 .  fact - factor
3630 
3631    Not Collective, each process can use a different factor
3632 
3633    Level: advanced
3634 
3635   Notes:
3636    This can also be set by the command line option: -mat_use_hash_table <fact>
3637 
3638 .keywords: matrix, hashtable, factor, HT
3639 
3640 .seealso: MatSetOption()
3641 @*/
3642 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3643 {
3644   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3645 
3646   PetscFunctionBegin;
3647   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3648   if (f) {
3649     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3650   }
3651   PetscFunctionReturn(0);
3652 }
3653 
3654 EXTERN_C_BEGIN
3655 #undef __FUNCT__
3656 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3657 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3658 {
3659   Mat_MPIBAIJ *baij;
3660 
3661   PetscFunctionBegin;
3662   baij = (Mat_MPIBAIJ*)mat->data;
3663   baij->ht_fact = fact;
3664   PetscFunctionReturn(0);
3665 }
3666 EXTERN_C_END
3667 
3668 #undef __FUNCT__
3669 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3670 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3671 {
3672   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3673   PetscFunctionBegin;
3674   *Ad     = a->A;
3675   *Ao     = a->B;
3676   *colmap = a->garray;
3677   PetscFunctionReturn(0);
3678 }
3679 
3680 /*
3681     Special version for direct calls from Fortran (to eliminate two function call overheads
3682 */
3683 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3684 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3685 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3686 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3687 #endif
3688 
3689 #undef __FUNCT__
3690 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3691 /*@C
3692   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3693 
3694   Collective on Mat
3695 
3696   Input Parameters:
3697 + mat - the matrix
3698 . min - number of input rows
3699 . im - input rows
3700 . nin - number of input columns
3701 . in - input columns
3702 . v - numerical values input
3703 - addvin - INSERT_VALUES or ADD_VALUES
3704 
3705   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3706 
3707   Level: advanced
3708 
3709 .seealso:   MatSetValuesBlocked()
3710 @*/
3711 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3712 {
3713   /* convert input arguments to C version */
3714   Mat             mat = *matin;
3715   PetscInt        m = *min, n = *nin;
3716   InsertMode      addv = *addvin;
3717 
3718   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3719   const MatScalar *value;
3720   MatScalar       *barray=baij->barray;
3721   PetscTruth      roworiented = baij->roworiented;
3722   PetscErrorCode  ierr;
3723   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3724   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3725   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3726 
3727   PetscFunctionBegin;
3728   /* tasks normally handled by MatSetValuesBlocked() */
3729   if (mat->insertmode == NOT_SET_VALUES) {
3730     mat->insertmode = addv;
3731   }
3732 #if defined(PETSC_USE_DEBUG)
3733   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3734   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3735 #endif
3736   if (mat->assembled) {
3737     mat->was_assembled = PETSC_TRUE;
3738     mat->assembled     = PETSC_FALSE;
3739   }
3740   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3741 
3742 
3743   if(!barray) {
3744     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3745     baij->barray = barray;
3746   }
3747 
3748   if (roworiented) {
3749     stepval = (n-1)*bs;
3750   } else {
3751     stepval = (m-1)*bs;
3752   }
3753   for (i=0; i<m; i++) {
3754     if (im[i] < 0) continue;
3755 #if defined(PETSC_USE_DEBUG)
3756     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3757 #endif
3758     if (im[i] >= rstart && im[i] < rend) {
3759       row = im[i] - rstart;
3760       for (j=0; j<n; j++) {
3761         /* If NumCol = 1 then a copy is not required */
3762         if ((roworiented) && (n == 1)) {
3763           barray = (MatScalar*)v + i*bs2;
3764         } else if((!roworiented) && (m == 1)) {
3765           barray = (MatScalar*)v + j*bs2;
3766         } else { /* Here a copy is required */
3767           if (roworiented) {
3768             value = v + i*(stepval+bs)*bs + j*bs;
3769           } else {
3770             value = v + j*(stepval+bs)*bs + i*bs;
3771           }
3772           for (ii=0; ii<bs; ii++,value+=stepval) {
3773             for (jj=0; jj<bs; jj++) {
3774               *barray++  = *value++;
3775             }
3776           }
3777           barray -=bs2;
3778         }
3779 
3780         if (in[j] >= cstart && in[j] < cend){
3781           col  = in[j] - cstart;
3782           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3783         }
3784         else if (in[j] < 0) continue;
3785 #if defined(PETSC_USE_DEBUG)
3786         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3787 #endif
3788         else {
3789           if (mat->was_assembled) {
3790             if (!baij->colmap) {
3791               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3792             }
3793 
3794 #if defined(PETSC_USE_DEBUG)
3795 #if defined (PETSC_USE_CTABLE)
3796             { PetscInt data;
3797               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3798               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3799             }
3800 #else
3801             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3802 #endif
3803 #endif
3804 #if defined (PETSC_USE_CTABLE)
3805 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3806             col  = (col - 1)/bs;
3807 #else
3808             col = (baij->colmap[in[j]] - 1)/bs;
3809 #endif
3810             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3811               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3812               col =  in[j];
3813             }
3814           }
3815           else col = in[j];
3816           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3817         }
3818       }
3819     } else {
3820       if (!baij->donotstash) {
3821         if (roworiented) {
3822           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3823         } else {
3824           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3825         }
3826       }
3827     }
3828   }
3829 
3830   /* task normally handled by MatSetValuesBlocked() */
3831   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3832   PetscFunctionReturn(0);
3833 }
3834