xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 0807856d2db3ca9b1c9d7e6c6c10311432e06a41)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "petscblaslapack.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
20 {
21   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
22   PetscErrorCode ierr;
23   PetscInt       i,*idxb = 0;
24   PetscScalar    *va,*vb;
25   Vec            vtmp;
26 
27   PetscFunctionBegin;
28   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
29   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
30   if (idx) {
31     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++){
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
41   }
42 
43   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
44   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
45   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
46   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 EXTERN_C_BEGIN
51 #undef __FUNCT__
52 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
60   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 /*
81      Local utility routine that creates a mapping from the global column
82    number to the local number in the off-diagonal part of the local
83    storage of the matrix.  This is done in a non scalable way since the
84    length of colmap equals the global matrix length.
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89 {
90   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
91   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
92   PetscErrorCode ierr;
93   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
94 
95   PetscFunctionBegin;
96 #if defined (PETSC_USE_CTABLE)
97   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
98   for (i=0; i<nbs; i++){
99     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
100   }
101 #else
102   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
103   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
105   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106 #endif
107   PetscFunctionReturn(0);
108 }
109 
110 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
111 { \
112  \
113     brow = row/bs;  \
114     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
115     rmax = aimax[brow]; nrow = ailen[brow]; \
116       bcol = col/bs; \
117       ridx = row % bs; cidx = col % bs; \
118       low = 0; high = nrow; \
119       while (high-low > 3) { \
120         t = (low+high)/2; \
121         if (rp[t] > bcol) high = t; \
122         else              low  = t; \
123       } \
124       for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > bcol) break; \
126         if (rp[_i] == bcol) { \
127           bap  = ap +  bs2*_i + bs*cidx + ridx; \
128           if (addv == ADD_VALUES) *bap += value;  \
129           else                    *bap  = value;  \
130           goto a_noinsert; \
131         } \
132       } \
133       if (a->nonew == 1) goto a_noinsert; \
134       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
136       N = nrow++ - 1;  \
137       /* shift up all the later entries in this row */ \
138       for (ii=N; ii>=_i; ii--) { \
139         rp[ii+1] = rp[ii]; \
140         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
141       } \
142       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
143       rp[_i]                      = bcol;  \
144       ap[bs2*_i + bs*cidx + ridx] = value;  \
145       a_noinsert:; \
146     ailen[brow] = nrow; \
147 }
148 
149 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
150 { \
151     brow = row/bs;  \
152     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
153     rmax = bimax[brow]; nrow = bilen[brow]; \
154       bcol = col/bs; \
155       ridx = row % bs; cidx = col % bs; \
156       low = 0; high = nrow; \
157       while (high-low > 3) { \
158         t = (low+high)/2; \
159         if (rp[t] > bcol) high = t; \
160         else              low  = t; \
161       } \
162       for (_i=low; _i<high; _i++) { \
163         if (rp[_i] > bcol) break; \
164         if (rp[_i] == bcol) { \
165           bap  = ap +  bs2*_i + bs*cidx + ridx; \
166           if (addv == ADD_VALUES) *bap += value;  \
167           else                    *bap  = value;  \
168           goto b_noinsert; \
169         } \
170       } \
171       if (b->nonew == 1) goto b_noinsert; \
172       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
173       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
174       CHKMEMQ;\
175       N = nrow++ - 1;  \
176       /* shift up all the later entries in this row */ \
177       for (ii=N; ii>=_i; ii--) { \
178         rp[ii+1] = rp[ii]; \
179         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
180       } \
181       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
182       rp[_i]                      = bcol;  \
183       ap[bs2*_i + bs*cidx + ridx] = value;  \
184       b_noinsert:; \
185     bilen[brow] = nrow; \
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatSetValues_MPIBAIJ"
190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
191 {
192   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
193   MatScalar      value;
194   PetscBool      roworiented = baij->roworiented;
195   PetscErrorCode ierr;
196   PetscInt       i,j,row,col;
197   PetscInt       rstart_orig=mat->rmap->rstart;
198   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
199   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
200 
201   /* Some Variables required in the macro */
202   Mat            A = baij->A;
203   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
204   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
205   MatScalar      *aa=a->a;
206 
207   Mat            B = baij->B;
208   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
209   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
210   MatScalar      *ba=b->a;
211 
212   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
213   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
214   MatScalar      *ap,*bap;
215 
216   PetscFunctionBegin;
217   if (v) PetscValidScalarPointer(v,6);
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_DEBUG)
221     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
222 #endif
223     if (im[i] >= rstart_orig && im[i] < rend_orig) {
224       row = im[i] - rstart_orig;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart_orig && in[j] < cend_orig){
227           col = in[j] - cstart_orig;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         } else if (in[j] < 0) continue;
232 #if defined(PETSC_USE_DEBUG)
233         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240 #if defined (PETSC_USE_CTABLE)
241             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
242             col  = col - 1;
243 #else
244             col = baij->colmap[in[j]/bs] - 1;
245 #endif
246             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
247               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
248               col =  in[j];
249               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
250               B = baij->B;
251               b = (Mat_SeqBAIJ*)(B)->data;
252               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
253               ba=b->a;
254             } else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         if (roworiented) {
265           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
266         } else {
267           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
268         }
269       }
270     }
271   }
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
278 {
279   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
280   const PetscScalar *value;
281   MatScalar         *barray=baij->barray;
282   PetscBool         roworiented = baij->roworiented;
283   PetscErrorCode    ierr;
284   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
285   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
286   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
287 
288   PetscFunctionBegin;
289   if(!barray) {
290     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
291     baij->barray = barray;
292   }
293 
294   if (roworiented) {
295     stepval = (n-1)*bs;
296   } else {
297     stepval = (m-1)*bs;
298   }
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316 	    value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) {
320               barray[jj]  = value[jj];
321             }
322             barray += bs;
323           }
324           barray -= bs2;
325         }
326 
327         if (in[j] >= cstart && in[j] < cend){
328           col  = in[j] - cstart;
329           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
330         }
331         else if (in[j] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
334 #endif
335         else {
336           if (mat->was_assembled) {
337             if (!baij->colmap) {
338               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
339             }
340 
341 #if defined(PETSC_USE_DEBUG)
342 #if defined (PETSC_USE_CTABLE)
343             { PetscInt data;
344               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
345               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346             }
347 #else
348             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
349 #endif
350 #endif
351 #if defined (PETSC_USE_CTABLE)
352 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
353             col  = (col - 1)/bs;
354 #else
355             col = (baij->colmap[in[j]] - 1)/bs;
356 #endif
357             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
358               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
359               col =  in[j];
360             }
361           }
362           else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
389   PetscBool      roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408       row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
413         /* Look up PetscInto the Hash Table */
414         key = (row/bs)*Nbs+(col/bs)+1;
415         h1  = HASH(size,key,tmp);
416 
417 
418         idx = h1;
419 #if defined(PETSC_USE_DEBUG)
420         insert_ct++;
421         total_ct++;
422         if (HT[idx] != key) {
423           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
424           if (idx == size) {
425             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
426             if (idx == h1) {
427               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
428             }
429           }
430         }
431 #else
432         if (HT[idx] != key) {
433           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
434           if (idx == size) {
435             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
436             if (idx == h1) {
437               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
438             }
439           }
440         }
441 #endif
442         /* A HASH table entry is found, so insert the values at the correct address */
443         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
444         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
445       }
446     } else {
447       if (!baij->donotstash) {
448         if (roworiented) {
449           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
450         } else {
451           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
452         }
453       }
454     }
455   }
456 #if defined(PETSC_USE_DEBUG)
457   baij->ht_total_ct = total_ct;
458   baij->ht_insert_ct = insert_ct;
459 #endif
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
466 {
467   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
468   PetscBool         roworiented = baij->roworiented;
469   PetscErrorCode    ierr;
470   PetscInt          i,j,ii,jj,row,col;
471   PetscInt          rstart=baij->rstartbs;
472   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
473   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
474   PetscReal         tmp;
475   MatScalar         **HD = baij->hd,*baij_a;
476   const PetscScalar *v_t,*value;
477 #if defined(PETSC_USE_DEBUG)
478   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
479 #endif
480 
481   PetscFunctionBegin;
482 
483   if (roworiented) {
484     stepval = (n-1)*bs;
485   } else {
486     stepval = (m-1)*bs;
487   }
488   for (i=0; i<m; i++) {
489 #if defined(PETSC_USE_DEBUG)
490     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
491     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
492 #endif
493     row   = im[i];
494     v_t   = v + i*nbs2;
495     if (row >= rstart && row < rend) {
496       for (j=0; j<n; j++) {
497         col = in[j];
498 
499         /* Look up into the Hash Table */
500         key = row*Nbs+col+1;
501         h1  = HASH(size,key,tmp);
502 
503         idx = h1;
504 #if defined(PETSC_USE_DEBUG)
505         total_ct++;
506         insert_ct++;
507        if (HT[idx] != key) {
508           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
509           if (idx == size) {
510             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
511             if (idx == h1) {
512               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
513             }
514           }
515         }
516 #else
517         if (HT[idx] != key) {
518           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
519           if (idx == size) {
520             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
521             if (idx == h1) {
522               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
523             }
524           }
525         }
526 #endif
527         baij_a = HD[idx];
528         if (roworiented) {
529           /*value = v + i*(stepval+bs)*bs + j*bs;*/
530           /* value = v + (i*(stepval+bs)+j)*bs; */
531           value = v_t;
532           v_t  += bs;
533           if (addv == ADD_VALUES) {
534             for (ii=0; ii<bs; ii++,value+=stepval) {
535               for (jj=ii; jj<bs2; jj+=bs) {
536                 baij_a[jj]  += *value++;
537               }
538             }
539           } else {
540             for (ii=0; ii<bs; ii++,value+=stepval) {
541               for (jj=ii; jj<bs2; jj+=bs) {
542                 baij_a[jj]  = *value++;
543               }
544             }
545           }
546         } else {
547           value = v + j*(stepval+bs)*bs + i*bs;
548           if (addv == ADD_VALUES) {
549             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
550               for (jj=0; jj<bs; jj++) {
551                 baij_a[jj]  += *value++;
552               }
553             }
554           } else {
555             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
556               for (jj=0; jj<bs; jj++) {
557                 baij_a[jj]  = *value++;
558               }
559             }
560           }
561         }
562       }
563     } else {
564       if (!baij->donotstash) {
565         if (roworiented) {
566           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         } else {
568           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
569         }
570       }
571     }
572   }
573 #if defined(PETSC_USE_DEBUG)
574   baij->ht_total_ct = total_ct;
575   baij->ht_insert_ct = insert_ct;
576 #endif
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatGetValues_MPIBAIJ"
582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
583 {
584   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
585   PetscErrorCode ierr;
586   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
587   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
588 
589   PetscFunctionBegin;
590   for (i=0; i<m; i++) {
591     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
592     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
593     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
594       row = idxm[i] - bsrstart;
595       for (j=0; j<n; j++) {
596         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
597         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
598         if (idxn[j] >= bscstart && idxn[j] < bscend){
599           col = idxn[j] - bscstart;
600           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
601         } else {
602           if (!baij->colmap) {
603             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
604           }
605 #if defined (PETSC_USE_CTABLE)
606           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
607           data --;
608 #else
609           data = baij->colmap[idxn[j]/bs]-1;
610 #endif
611           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
612           else {
613             col  = data + idxn[j]%bs;
614             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
615           }
616         }
617       }
618     } else {
619       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620     }
621   }
622  PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatNorm_MPIBAIJ"
627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
628 {
629   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
630   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
633   PetscReal      sum = 0.0;
634   MatScalar      *v;
635 
636   PetscFunctionBegin;
637   if (baij->size == 1) {
638     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
639   } else {
640     if (type == NORM_FROBENIUS) {
641       v = amat->a;
642       nz = amat->nz*bs2;
643       for (i=0; i<nz; i++) {
644 #if defined(PETSC_USE_COMPLEX)
645         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
646 #else
647         sum += (*v)*(*v); v++;
648 #endif
649       }
650       v = bmat->a;
651       nz = bmat->nz*bs2;
652       for (i=0; i<nz; i++) {
653 #if defined(PETSC_USE_COMPLEX)
654         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
655 #else
656         sum += (*v)*(*v); v++;
657 #endif
658       }
659       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
660       *nrm = sqrt(*nrm);
661     } else if (type == NORM_1) { /* max column sum */
662       PetscReal *tmp,*tmp2;
663       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
664       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
665       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
666       v = amat->a; jj = amat->j;
667       for (i=0; i<amat->nz; i++) {
668         for (j=0; j<bs; j++){
669           col = bs*(cstart + *jj) + j; /* column index */
670           for (row=0; row<bs; row++){
671             tmp[col] += PetscAbsScalar(*v);  v++;
672           }
673         }
674         jj++;
675       }
676       v = bmat->a; jj = bmat->j;
677       for (i=0; i<bmat->nz; i++) {
678         for (j=0; j<bs; j++){
679           col = bs*garray[*jj] + j;
680           for (row=0; row<bs; row++){
681             tmp[col] += PetscAbsScalar(*v); v++;
682           }
683         }
684         jj++;
685       }
686       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
687       *nrm = 0.0;
688       for (j=0; j<mat->cmap->N; j++) {
689         if (tmp2[j] > *nrm) *nrm = tmp2[j];
690       }
691       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
692     } else if (type == NORM_INFINITY) { /* max row sum */
693       PetscReal *sums;
694       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
695       sum = 0.0;
696       for (j=0; j<amat->mbs; j++) {
697         for (row=0; row<bs; row++) sums[row] = 0.0;
698         v = amat->a + bs2*amat->i[j];
699         nz = amat->i[j+1]-amat->i[j];
700         for (i=0; i<nz; i++) {
701           for (col=0; col<bs; col++){
702             for (row=0; row<bs; row++){
703               sums[row] += PetscAbsScalar(*v); v++;
704             }
705           }
706         }
707         v = bmat->a + bs2*bmat->i[j];
708         nz = bmat->i[j+1]-bmat->i[j];
709         for (i=0; i<nz; i++) {
710           for (col=0; col<bs; col++){
711             for (row=0; row<bs; row++){
712               sums[row] += PetscAbsScalar(*v); v++;
713             }
714           }
715         }
716         for (row=0; row<bs; row++){
717           if (sums[row] > sum) sum = sums[row];
718         }
719       }
720       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
721       ierr = PetscFree(sums);CHKERRQ(ierr);
722     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 /*
728   Creates the hash table, and sets the table
729   This table is created only once.
730   If new entried need to be added to the matrix
731   then the hash table has to be destroyed and
732   recreated.
733 */
734 #undef __FUNCT__
735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
737 {
738   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
739   Mat            A = baij->A,B=baij->B;
740   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
741   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
742   PetscErrorCode ierr;
743   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
744   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
745   PetscInt       *HT,key;
746   MatScalar      **HD;
747   PetscReal      tmp;
748 #if defined(PETSC_USE_INFO)
749   PetscInt       ct=0,max=0;
750 #endif
751 
752   PetscFunctionBegin;
753   if (baij->ht) PetscFunctionReturn(0);
754 
755   baij->ht_size = (PetscInt)(factor*nz);
756   ht_size       = baij->ht_size;
757 
758   /* Allocate Memory for Hash Table */
759   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
760   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
761   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
762   HD   = baij->hd;
763   HT   = baij->ht;
764 
765   /* Loop Over A */
766   for (i=0; i<a->mbs; i++) {
767     for (j=ai[i]; j<ai[i+1]; j++) {
768       row = i+rstart;
769       col = aj[j]+cstart;
770 
771       key = row*Nbs + col + 1;
772       h1  = HASH(ht_size,key,tmp);
773       for (k=0; k<ht_size; k++){
774         if (!HT[(h1+k)%ht_size]) {
775           HT[(h1+k)%ht_size] = key;
776           HD[(h1+k)%ht_size] = a->a + j*bs2;
777           break;
778 #if defined(PETSC_USE_INFO)
779         } else {
780           ct++;
781 #endif
782         }
783       }
784 #if defined(PETSC_USE_INFO)
785       if (k> max) max = k;
786 #endif
787     }
788   }
789   /* Loop Over B */
790   for (i=0; i<b->mbs; i++) {
791     for (j=bi[i]; j<bi[i+1]; j++) {
792       row = i+rstart;
793       col = garray[bj[j]];
794       key = row*Nbs + col + 1;
795       h1  = HASH(ht_size,key,tmp);
796       for (k=0; k<ht_size; k++){
797         if (!HT[(h1+k)%ht_size]) {
798           HT[(h1+k)%ht_size] = key;
799           HD[(h1+k)%ht_size] = b->a + j*bs2;
800           break;
801 #if defined(PETSC_USE_INFO)
802         } else {
803           ct++;
804 #endif
805         }
806       }
807 #if defined(PETSC_USE_INFO)
808       if (k> max) max = k;
809 #endif
810     }
811   }
812 
813   /* Print Summary */
814 #if defined(PETSC_USE_INFO)
815   for (i=0,j=0; i<ht_size; i++) {
816     if (HT[i]) {j++;}
817   }
818   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
819 #endif
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
826 {
827   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
828   PetscErrorCode ierr;
829   PetscInt       nstash,reallocs;
830   InsertMode     addv;
831 
832   PetscFunctionBegin;
833   if (baij->donotstash || mat->nooffprocentries) {
834     PetscFunctionReturn(0);
835   }
836 
837   /* make sure all processors are either in INSERTMODE or ADDMODE */
838   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
839   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
840   mat->insertmode = addv; /* in case this processor had no cache */
841 
842   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
843   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
844   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
845   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
846   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
847   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
854 {
855   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
856   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
857   PetscErrorCode ierr;
858   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
859   PetscInt       *row,*col;
860   PetscBool      r1,r2,r3,other_disassembled;
861   MatScalar      *val;
862   InsertMode     addv = mat->insertmode;
863   PetscMPIInt    n;
864 
865   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
866   PetscFunctionBegin;
867   if (!baij->donotstash && !mat->nooffprocentries) {
868     while (1) {
869       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
870       if (!flg) break;
871 
872       for (i=0; i<n;) {
873         /* Now identify the consecutive vals belonging to the same row */
874         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
875         if (j < n) ncols = j-i;
876         else       ncols = n-i;
877         /* Now assemble all these values with a single function call */
878         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
879         i = j;
880       }
881     }
882     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
883     /* Now process the block-stash. Since the values are stashed column-oriented,
884        set the roworiented flag to column oriented, and after MatSetValues()
885        restore the original flags */
886     r1 = baij->roworiented;
887     r2 = a->roworiented;
888     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
889     baij->roworiented = PETSC_FALSE;
890     a->roworiented    = PETSC_FALSE;
891     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
892     while (1) {
893       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
894       if (!flg) break;
895 
896       for (i=0; i<n;) {
897         /* Now identify the consecutive vals belonging to the same row */
898         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
899         if (j < n) ncols = j-i;
900         else       ncols = n-i;
901         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
902         i = j;
903       }
904     }
905     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
906     baij->roworiented = r1;
907     a->roworiented    = r2;
908     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
909   }
910 
911   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
912   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
913 
914   /* determine if any processor has disassembled, if so we must
915      also disassemble ourselfs, in order that we may reassemble. */
916   /*
917      if nonzero structure of submatrix B cannot change then we know that
918      no processor disassembled thus we can skip this stuff
919   */
920   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
921     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
922     if (mat->was_assembled && !other_disassembled) {
923       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
924     }
925   }
926 
927   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
928     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
929   }
930   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
931   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
932   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
933 
934 #if defined(PETSC_USE_INFO)
935   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
936     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
937     baij->ht_total_ct  = 0;
938     baij->ht_insert_ct = 0;
939   }
940 #endif
941   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
942     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
943     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
944     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
945   }
946 
947   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
948   baij->rowvalues = 0;
949   PetscFunctionReturn(0);
950 }
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
955 {
956   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
957   PetscErrorCode    ierr;
958   PetscMPIInt       size = baij->size,rank = baij->rank;
959   PetscInt          bs = mat->rmap->bs;
960   PetscBool         iascii,isdraw;
961   PetscViewer       sviewer;
962   PetscViewerFormat format;
963 
964   PetscFunctionBegin;
965   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
966   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
967   if (iascii) {
968     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
969     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
970       MatInfo info;
971       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
972       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
973       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
974              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
977       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
978       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
979       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
980       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
981       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     } else if (format == PETSC_VIEWER_ASCII_INFO) {
984       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
987       PetscFunctionReturn(0);
988     }
989   }
990 
991   if (isdraw) {
992     PetscDraw       draw;
993     PetscBool  isnull;
994     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
995     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
996   }
997 
998   if (size == 1) {
999     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1000     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1001   } else {
1002     /* assemble the entire matrix onto first processor. */
1003     Mat         A;
1004     Mat_SeqBAIJ *Aloc;
1005     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1006     MatScalar   *a;
1007 
1008     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1009     /* Perhaps this should be the type of mat? */
1010     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1011     if (!rank) {
1012       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1013     } else {
1014       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1015     }
1016     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1017     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1018     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1019 
1020     /* copy over the A part */
1021     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1022     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1023     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1024 
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = (baij->cstartbs+aj[j])*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     /* copy over the B part */
1037     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1038     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1039     for (i=0; i<mbs; i++) {
1040       rvals[0] = bs*(baij->rstartbs + i);
1041       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1042       for (j=ai[i]; j<ai[i+1]; j++) {
1043         col = baij->garray[aj[j]]*bs;
1044         for (k=0; k<bs; k++) {
1045           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1046           col++; a += bs;
1047         }
1048       }
1049     }
1050     ierr = PetscFree(rvals);CHKERRQ(ierr);
1051     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     /*
1054        Everyone has to call to draw the matrix since the graphics waits are
1055        synchronized across all processors that share the PetscDraw object
1056     */
1057     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1058     if (!rank) {
1059       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1060     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1061       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1062       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1063     }
1064     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1065     ierr = MatDestroy(A);CHKERRQ(ierr);
1066   }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1072 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1073 {
1074   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1075   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1076   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1077   PetscErrorCode ierr;
1078   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,count,j,k,bs2=a->bs2,header[4],nz,rlen;
1079   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1080   int            fd;
1081   PetscScalar    *column_values;
1082   FILE           *file;
1083   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1084 
1085   PetscFunctionBegin;
1086   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1087   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1088   nz   = bs2*(A->nz + B->nz);
1089   rlen = mat->rmap->n;
1090   if (!rank) {
1091     header[0] = MAT_FILE_CLASSID;
1092     header[1] = mat->rmap->N;
1093     header[2] = mat->cmap->N;
1094     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1095     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1096     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1097     /* get largest number of rows any processor has */
1098     range = mat->rmap->range;
1099     for (i=1; i<size; i++) {
1100       rlen = PetscMax(rlen,range[i+1] - range[i]);
1101     }
1102   } else {
1103     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1104   }
1105 
1106   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1107   /* compute lengths of each row  */
1108   count = 0;
1109   for (i=0; i<a->mbs; i++) {
1110     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1111   }
1112   /* store the row lengths to the file */
1113   if (!rank) {
1114     MPI_Status status;
1115     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1116     rlen  = (range[1] - range[0])/bs;
1117     for (i=0; i<rlen; i++) {
1118       for (j=0; j<bs; j++) {
1119         row_lens[i*bs+j] = bs*crow_lens[i];
1120       }
1121     }
1122     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1123     for (i=1; i<size; i++) {
1124       rlen = (range[i+1] - range[i])/bs;
1125       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1126       for (k=0; k<rlen; k++) {
1127 	for (j=0; j<bs; j++) {
1128 	  row_lens[k*bs+j] = bs*crow_lens[k];
1129 	}
1130       }
1131       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1132     }
1133     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1134   } else {
1135     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1136   }
1137   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1138 
1139   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1140      information needed to make it for each row from a block row. This does require more communication but still not more than
1141      the communication needed for the nonzero values  */
1142   nzmax = nz; /*  space a largest processor needs */
1143   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1144   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1145   cnt  = 0;
1146   for (i=0; i<a->mbs; i++) {
1147     pcnt = cnt;
1148     for (j=B->i[i]; j<B->i[i+1]; j++) {
1149       if ( (col = garray[B->j[j]]) > cstart) break;
1150       for (l=0; l<bs; l++) {
1151 	column_indices[cnt++] = bs*col+l;
1152       }
1153     }
1154     for (k=A->i[i]; k<A->i[i+1]; k++) {
1155       for (l=0; l<bs; l++) {
1156         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1157       }
1158     }
1159     for (; j<B->i[i+1]; j++) {
1160       for (l=0; l<bs; l++) {
1161         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1162       }
1163     }
1164     len = cnt - pcnt;
1165     for (k=1; k<bs; k++) {
1166       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1167       cnt += len;
1168     }
1169   }
1170   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1171 
1172   /* store the columns to the file */
1173   if (!rank) {
1174     MPI_Status status;
1175     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1176     for (i=1; i<size; i++) {
1177       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1178       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1179       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1180     }
1181   } else {
1182     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1183     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1184   }
1185   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1186 
1187   /* load up the numerical values */
1188   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1189   cnt = 0;
1190   for (i=0; i<a->mbs; i++) {
1191     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1192     for (j=B->i[i]; j<B->i[i+1]; j++) {
1193       if ( garray[B->j[j]] > cstart) break;
1194       for (l=0; l<bs; l++) {
1195         for (ll=0; ll<bs; ll++) {
1196 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1197         }
1198       }
1199       cnt += bs;
1200     }
1201     for (k=A->i[i]; k<A->i[i+1]; k++) {
1202       for (l=0; l<bs; l++) {
1203         for (ll=0; ll<bs; ll++) {
1204           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1205         }
1206       }
1207       cnt += bs;
1208     }
1209     for (; j<B->i[i+1]; j++) {
1210       for (l=0; l<bs; l++) {
1211         for (ll=0; ll<bs; ll++) {
1212 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1213         }
1214       }
1215       cnt += bs;
1216     }
1217     cnt += (bs-1)*rlen;
1218   }
1219   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1220 
1221   /* store the column values to the file */
1222   if (!rank) {
1223     MPI_Status status;
1224     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1225     for (i=1; i<size; i++) {
1226       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1227       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1228       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1229     }
1230   } else {
1231     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1232     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1233   }
1234   ierr = PetscFree(column_values);CHKERRQ(ierr);
1235 
1236   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1237   if (file) {
1238     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1239   }
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNCT__
1244 #define __FUNCT__ "MatView_MPIBAIJ"
1245 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1246 {
1247   PetscErrorCode ierr;
1248   PetscBool      iascii,isdraw,issocket,isbinary;
1249 
1250   PetscFunctionBegin;
1251   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1252   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1253   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1254   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1255   if (iascii || isdraw || issocket) {
1256     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1257   } else if (isbinary) {
1258     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1259   } else {
1260     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1261   }
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1267 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1268 {
1269   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1270   PetscErrorCode ierr;
1271 
1272   PetscFunctionBegin;
1273 #if defined(PETSC_USE_LOG)
1274   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1275 #endif
1276   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1277   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1278   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
1279   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
1280 #if defined (PETSC_USE_CTABLE)
1281   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1282 #else
1283   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1284 #endif
1285   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1286   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1287   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1288   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1289   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1290   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1291   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1292   ierr = PetscFree(baij);CHKERRQ(ierr);
1293 
1294   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatMult_MPIBAIJ"
1307 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1308 {
1309   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311   PetscInt       nt;
1312 
1313   PetscFunctionBegin;
1314   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1315   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1316   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1317   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1318   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1319   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1320   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1321   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1327 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1328 {
1329   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1334   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1335   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1336   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1342 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1343 {
1344   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1345   PetscErrorCode ierr;
1346   PetscBool      merged;
1347 
1348   PetscFunctionBegin;
1349   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1350   /* do nondiagonal part */
1351   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1352   if (!merged) {
1353     /* send it on its way */
1354     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1355     /* do local part */
1356     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1357     /* receive remote parts: note this assumes the values are not actually */
1358     /* inserted in yy until the next line */
1359     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1360   } else {
1361     /* do local part */
1362     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1363     /* send it on its way */
1364     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1365     /* values actually were received in the Begin() but we need to call this nop */
1366     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1367   }
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1373 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1374 {
1375   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1376   PetscErrorCode ierr;
1377 
1378   PetscFunctionBegin;
1379   /* do nondiagonal part */
1380   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1381   /* send it on its way */
1382   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383   /* do local part */
1384   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1385   /* receive remote parts: note this assumes the values are not actually */
1386   /* inserted in yy until the next line, which is true for my implementation*/
1387   /* but is not perhaps always true. */
1388   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 /*
1393   This only works correctly for square matrices where the subblock A->A is the
1394    diagonal block
1395 */
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1398 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1399 {
1400   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1401   PetscErrorCode ierr;
1402 
1403   PetscFunctionBegin;
1404   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1405   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatScale_MPIBAIJ"
1411 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1412 {
1413   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1414   PetscErrorCode ierr;
1415 
1416   PetscFunctionBegin;
1417   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1418   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1424 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1425 {
1426   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1427   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1428   PetscErrorCode ierr;
1429   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1430   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1431   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1432 
1433   PetscFunctionBegin;
1434   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1435   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1436   mat->getrowactive = PETSC_TRUE;
1437 
1438   if (!mat->rowvalues && (idx || v)) {
1439     /*
1440         allocate enough space to hold information from the longest row.
1441     */
1442     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1443     PetscInt     max = 1,mbs = mat->mbs,tmp;
1444     for (i=0; i<mbs; i++) {
1445       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1446       if (max < tmp) { max = tmp; }
1447     }
1448     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1449   }
1450   lrow = row - brstart;
1451 
1452   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1453   if (!v)   {pvA = 0; pvB = 0;}
1454   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1455   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1456   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1457   nztot = nzA + nzB;
1458 
1459   cmap  = mat->garray;
1460   if (v  || idx) {
1461     if (nztot) {
1462       /* Sort by increasing column numbers, assuming A and B already sorted */
1463       PetscInt imark = -1;
1464       if (v) {
1465         *v = v_p = mat->rowvalues;
1466         for (i=0; i<nzB; i++) {
1467           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1468           else break;
1469         }
1470         imark = i;
1471         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1472         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1473       }
1474       if (idx) {
1475         *idx = idx_p = mat->rowindices;
1476         if (imark > -1) {
1477           for (i=0; i<imark; i++) {
1478             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1479           }
1480         } else {
1481           for (i=0; i<nzB; i++) {
1482             if (cmap[cworkB[i]/bs] < cstart)
1483               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1484             else break;
1485           }
1486           imark = i;
1487         }
1488         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1489         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1490       }
1491     } else {
1492       if (idx) *idx = 0;
1493       if (v)   *v   = 0;
1494     }
1495   }
1496   *nz = nztot;
1497   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1498   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1504 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1505 {
1506   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1507 
1508   PetscFunctionBegin;
1509   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1510   baij->getrowactive = PETSC_FALSE;
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1516 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1517 {
1518   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1519   PetscErrorCode ierr;
1520 
1521   PetscFunctionBegin;
1522   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1523   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 #undef __FUNCT__
1528 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1529 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1530 {
1531   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1532   Mat            A = a->A,B = a->B;
1533   PetscErrorCode ierr;
1534   PetscReal      isend[5],irecv[5];
1535 
1536   PetscFunctionBegin;
1537   info->block_size     = (PetscReal)matin->rmap->bs;
1538   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1539   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1540   isend[3] = info->memory;  isend[4] = info->mallocs;
1541   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1542   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1543   isend[3] += info->memory;  isend[4] += info->mallocs;
1544   if (flag == MAT_LOCAL) {
1545     info->nz_used      = isend[0];
1546     info->nz_allocated = isend[1];
1547     info->nz_unneeded  = isend[2];
1548     info->memory       = isend[3];
1549     info->mallocs      = isend[4];
1550   } else if (flag == MAT_GLOBAL_MAX) {
1551     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1552     info->nz_used      = irecv[0];
1553     info->nz_allocated = irecv[1];
1554     info->nz_unneeded  = irecv[2];
1555     info->memory       = irecv[3];
1556     info->mallocs      = irecv[4];
1557   } else if (flag == MAT_GLOBAL_SUM) {
1558     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1559     info->nz_used      = irecv[0];
1560     info->nz_allocated = irecv[1];
1561     info->nz_unneeded  = irecv[2];
1562     info->memory       = irecv[3];
1563     info->mallocs      = irecv[4];
1564   } else {
1565     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1566   }
1567   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1568   info->fill_ratio_needed = 0;
1569   info->factor_mallocs    = 0;
1570   PetscFunctionReturn(0);
1571 }
1572 
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1575 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1576 {
1577   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1578   PetscErrorCode ierr;
1579 
1580   PetscFunctionBegin;
1581   switch (op) {
1582   case MAT_NEW_NONZERO_LOCATIONS:
1583   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1584   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1585   case MAT_KEEP_NONZERO_PATTERN:
1586   case MAT_NEW_NONZERO_LOCATION_ERR:
1587     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1588     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1589     break;
1590   case MAT_ROW_ORIENTED:
1591     a->roworiented = flg;
1592     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1593     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1594     break;
1595   case MAT_NEW_DIAGONALS:
1596     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1597     break;
1598   case MAT_IGNORE_OFF_PROC_ENTRIES:
1599     a->donotstash = flg;
1600     break;
1601   case MAT_USE_HASH_TABLE:
1602     a->ht_flag = flg;
1603     break;
1604   case MAT_SYMMETRIC:
1605   case MAT_STRUCTURALLY_SYMMETRIC:
1606   case MAT_HERMITIAN:
1607   case MAT_SYMMETRY_ETERNAL:
1608     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1609     break;
1610   default:
1611     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1612   }
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1618 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1619 {
1620   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1621   Mat_SeqBAIJ    *Aloc;
1622   Mat            B;
1623   PetscErrorCode ierr;
1624   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1625   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1626   MatScalar      *a;
1627 
1628   PetscFunctionBegin;
1629   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1630   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1631     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1632     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1633     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1634     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1635   } else {
1636     B = *matout;
1637   }
1638 
1639   /* copy over the A part */
1640   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1641   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1642   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1643 
1644   for (i=0; i<mbs; i++) {
1645     rvals[0] = bs*(baij->rstartbs + i);
1646     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1647     for (j=ai[i]; j<ai[i+1]; j++) {
1648       col = (baij->cstartbs+aj[j])*bs;
1649       for (k=0; k<bs; k++) {
1650         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1651         col++; a += bs;
1652       }
1653     }
1654   }
1655   /* copy over the B part */
1656   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1657   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1658   for (i=0; i<mbs; i++) {
1659     rvals[0] = bs*(baij->rstartbs + i);
1660     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1661     for (j=ai[i]; j<ai[i+1]; j++) {
1662       col = baij->garray[aj[j]]*bs;
1663       for (k=0; k<bs; k++) {
1664         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1665         col++; a += bs;
1666       }
1667     }
1668   }
1669   ierr = PetscFree(rvals);CHKERRQ(ierr);
1670   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1671   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1672 
1673   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1674     *matout = B;
1675   } else {
1676     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1677   }
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1683 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1684 {
1685   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1686   Mat            a = baij->A,b = baij->B;
1687   PetscErrorCode ierr;
1688   PetscInt       s1,s2,s3;
1689 
1690   PetscFunctionBegin;
1691   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1692   if (rr) {
1693     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1694     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1695     /* Overlap communication with computation. */
1696     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1697   }
1698   if (ll) {
1699     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1700     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1701     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1702   }
1703   /* scale  the diagonal block */
1704   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1705 
1706   if (rr) {
1707     /* Do a scatter end and then right scale the off-diagonal block */
1708     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1709     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1710   }
1711 
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 #undef __FUNCT__
1716 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1717 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1718 {
1719   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1720   PetscErrorCode ierr;
1721   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1722   PetscInt       i,*owners = A->rmap->range;
1723   PetscInt       *nprocs,j,idx,nsends,row;
1724   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1725   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1726   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1727   MPI_Comm       comm = ((PetscObject)A)->comm;
1728   MPI_Request    *send_waits,*recv_waits;
1729   MPI_Status     recv_status,*send_status;
1730 #if defined(PETSC_DEBUG)
1731   PetscBool      found = PETSC_FALSE;
1732 #endif
1733 
1734   PetscFunctionBegin;
1735   /*  first count number of contributors to each processor */
1736   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1737   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1738   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1739   j = 0;
1740   for (i=0; i<N; i++) {
1741     if (lastidx > (idx = rows[i])) j = 0;
1742     lastidx = idx;
1743     for (; j<size; j++) {
1744       if (idx >= owners[j] && idx < owners[j+1]) {
1745         nprocs[2*j]++;
1746         nprocs[2*j+1] = 1;
1747         owner[i] = j;
1748 #if defined(PETSC_DEBUG)
1749         found = PETSC_TRUE;
1750 #endif
1751         break;
1752       }
1753     }
1754 #if defined(PETSC_DEBUG)
1755     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1756     found = PETSC_FALSE;
1757 #endif
1758   }
1759   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1760 
1761   if (A->nooffproczerorows) {
1762     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1763     nrecvs = nsends;
1764     nmax   = N;
1765   } else {
1766     /* inform other processors of number of messages and max length*/
1767     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1768   }
1769 
1770   /* post receives:   */
1771   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1772   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1773   for (i=0; i<nrecvs; i++) {
1774     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1775   }
1776 
1777   /* do sends:
1778      1) starts[i] gives the starting index in svalues for stuff going to
1779      the ith processor
1780   */
1781   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1782   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1783   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1784   starts[0]  = 0;
1785   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1786   for (i=0; i<N; i++) {
1787     svalues[starts[owner[i]]++] = rows[i];
1788   }
1789 
1790   starts[0] = 0;
1791   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1792   count = 0;
1793   for (i=0; i<size; i++) {
1794     if (nprocs[2*i+1]) {
1795       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1796     }
1797   }
1798   ierr = PetscFree(starts);CHKERRQ(ierr);
1799 
1800   base = owners[rank];
1801 
1802   /*  wait on receives */
1803   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1804   count  = nrecvs;
1805   slen = 0;
1806   while (count) {
1807     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1808     /* unpack receives into our local space */
1809     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1810     source[imdex]  = recv_status.MPI_SOURCE;
1811     lens[imdex]    = n;
1812     slen          += n;
1813     count--;
1814   }
1815   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1816 
1817   /* move the data into the send scatter */
1818   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1819   count = 0;
1820   for (i=0; i<nrecvs; i++) {
1821     values = rvalues + i*nmax;
1822     for (j=0; j<lens[i]; j++) {
1823       lrows[count++] = values[j] - base;
1824     }
1825   }
1826   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1827   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1828   ierr = PetscFree(owner);CHKERRQ(ierr);
1829   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1830 
1831   /* actually zap the local rows */
1832   /*
1833         Zero the required rows. If the "diagonal block" of the matrix
1834      is square and the user wishes to set the diagonal we use separate
1835      code so that MatSetValues() is not called for each diagonal allocating
1836      new memory, thus calling lots of mallocs and slowing things down.
1837 
1838   */
1839   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1840   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1841   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1842     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1843   } else if (diag != 0.0) {
1844     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1845     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1846        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1847     for (i=0; i<slen; i++) {
1848       row  = lrows[i] + rstart_bs;
1849       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1850     }
1851     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1852     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1853   } else {
1854     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1855   }
1856 
1857   ierr = PetscFree(lrows);CHKERRQ(ierr);
1858 
1859   /* wait on sends */
1860   if (nsends) {
1861     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1862     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1863     ierr = PetscFree(send_status);CHKERRQ(ierr);
1864   }
1865   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1866   ierr = PetscFree(svalues);CHKERRQ(ierr);
1867 
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 #undef __FUNCT__
1872 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1873 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1874 {
1875   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1876   PetscErrorCode ierr;
1877 
1878   PetscFunctionBegin;
1879   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatEqual_MPIBAIJ"
1887 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1888 {
1889   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1890   Mat            a,b,c,d;
1891   PetscBool      flg;
1892   PetscErrorCode ierr;
1893 
1894   PetscFunctionBegin;
1895   a = matA->A; b = matA->B;
1896   c = matB->A; d = matB->B;
1897 
1898   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1899   if (flg) {
1900     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1901   }
1902   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 #undef __FUNCT__
1907 #define __FUNCT__ "MatCopy_MPIBAIJ"
1908 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1909 {
1910   PetscErrorCode ierr;
1911   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1912   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1913 
1914   PetscFunctionBegin;
1915   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1916   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1917     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1918   } else {
1919     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1920     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1921   }
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 #undef __FUNCT__
1926 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1927 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1928 {
1929   PetscErrorCode ierr;
1930 
1931   PetscFunctionBegin;
1932   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 #undef __FUNCT__
1937 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1938 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1939 {
1940   PetscErrorCode ierr;
1941   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1942   PetscBLASInt   bnz,one=1;
1943   Mat_SeqBAIJ    *x,*y;
1944 
1945   PetscFunctionBegin;
1946   if (str == SAME_NONZERO_PATTERN) {
1947     PetscScalar alpha = a;
1948     x = (Mat_SeqBAIJ *)xx->A->data;
1949     y = (Mat_SeqBAIJ *)yy->A->data;
1950     bnz = PetscBLASIntCast(x->nz);
1951     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1952     x = (Mat_SeqBAIJ *)xx->B->data;
1953     y = (Mat_SeqBAIJ *)yy->B->data;
1954     bnz = PetscBLASIntCast(x->nz);
1955     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1956   } else {
1957     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1958   }
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1964 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1965 {
1966   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1967   PetscInt rbs,cbs;
1968   PetscErrorCode ierr;
1969 
1970   PetscFunctionBegin;
1971   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1972   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1973   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1974   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1975   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1976   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1982 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1983 {
1984   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1985   PetscErrorCode ierr;
1986 
1987   PetscFunctionBegin;
1988   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1989   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 #undef __FUNCT__
1994 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1995 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1996 {
1997   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1998   PetscErrorCode ierr;
1999 
2000   PetscFunctionBegin;
2001   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2002   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2008 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2009 {
2010   PetscErrorCode ierr;
2011   IS             iscol_local;
2012   PetscInt       csize;
2013 
2014   PetscFunctionBegin;
2015   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2016   if (call == MAT_REUSE_MATRIX) {
2017     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2018     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2019   } else {
2020     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2021   }
2022   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2023   if (call == MAT_INITIAL_MATRIX) {
2024     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2025     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
2026   }
2027   PetscFunctionReturn(0);
2028 }
2029 
2030 #undef __FUNCT__
2031 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2032 /*
2033     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2034   in local and then by concatenating the local matrices the end result.
2035   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2036 */
2037 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2038 {
2039   PetscErrorCode ierr;
2040   PetscMPIInt    rank,size;
2041   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2042   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2043   Mat            *local,M,Mreuse;
2044   MatScalar      *vwork,*aa;
2045   MPI_Comm       comm = ((PetscObject)mat)->comm;
2046   Mat_SeqBAIJ    *aij;
2047 
2048 
2049   PetscFunctionBegin;
2050   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2051   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2052 
2053   if (call ==  MAT_REUSE_MATRIX) {
2054     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2055     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2056     local = &Mreuse;
2057     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2058   } else {
2059     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2060     Mreuse = *local;
2061     ierr   = PetscFree(local);CHKERRQ(ierr);
2062   }
2063 
2064   /*
2065       m - number of local rows
2066       n - number of columns (same on all processors)
2067       rstart - first row in new global matrix generated
2068   */
2069   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2070   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2071   m    = m/bs;
2072   n    = n/bs;
2073 
2074   if (call == MAT_INITIAL_MATRIX) {
2075     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2076     ii  = aij->i;
2077     jj  = aij->j;
2078 
2079     /*
2080         Determine the number of non-zeros in the diagonal and off-diagonal
2081         portions of the matrix in order to do correct preallocation
2082     */
2083 
2084     /* first get start and end of "diagonal" columns */
2085     if (csize == PETSC_DECIDE) {
2086       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2087       if (mglobal == n*bs) { /* square matrix */
2088 	nlocal = m;
2089       } else {
2090         nlocal = n/size + ((n % size) > rank);
2091       }
2092     } else {
2093       nlocal = csize/bs;
2094     }
2095     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2096     rstart = rend - nlocal;
2097     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2098 
2099     /* next, compute all the lengths */
2100     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2101     olens = dlens + m;
2102     for (i=0; i<m; i++) {
2103       jend = ii[i+1] - ii[i];
2104       olen = 0;
2105       dlen = 0;
2106       for (j=0; j<jend; j++) {
2107         if (*jj < rstart || *jj >= rend) olen++;
2108         else dlen++;
2109         jj++;
2110       }
2111       olens[i] = olen;
2112       dlens[i] = dlen;
2113     }
2114     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2115     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2116     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2117     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2118     ierr = PetscFree(dlens);CHKERRQ(ierr);
2119   } else {
2120     PetscInt ml,nl;
2121 
2122     M = *newmat;
2123     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2124     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2125     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2126     /*
2127          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2128        rather than the slower MatSetValues().
2129     */
2130     M->was_assembled = PETSC_TRUE;
2131     M->assembled     = PETSC_FALSE;
2132   }
2133   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2134   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2135   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2136   ii  = aij->i;
2137   jj  = aij->j;
2138   aa  = aij->a;
2139   for (i=0; i<m; i++) {
2140     row   = rstart/bs + i;
2141     nz    = ii[i+1] - ii[i];
2142     cwork = jj;     jj += nz;
2143     vwork = aa;     aa += nz;
2144     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2145   }
2146 
2147   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2148   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2149   *newmat = M;
2150 
2151   /* save submatrix used in processor for next request */
2152   if (call ==  MAT_INITIAL_MATRIX) {
2153     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2154     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2155   }
2156 
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 #undef __FUNCT__
2161 #define __FUNCT__ "MatPermute_MPIBAIJ"
2162 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2163 {
2164   MPI_Comm       comm,pcomm;
2165   PetscInt       first,local_size,nrows;
2166   const PetscInt *rows;
2167   PetscMPIInt    size;
2168   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2169   PetscErrorCode ierr;
2170 
2171   PetscFunctionBegin;
2172   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2173   /* make a collective version of 'rowp' */
2174   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2175   if (pcomm==comm) {
2176     crowp = rowp;
2177   } else {
2178     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2179     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2180     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2181     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2182   }
2183   /* collect the global row permutation and invert it */
2184   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2185   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2186   if (pcomm!=comm) {
2187     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2188   }
2189   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2190   /* get the local target indices */
2191   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2192   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2193   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2194   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2195   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2196   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2197   /* the column permutation is so much easier;
2198      make a local version of 'colp' and invert it */
2199   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2200   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2201   if (size==1) {
2202     lcolp = colp;
2203   } else {
2204     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2205     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2206     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2207   }
2208   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2209   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2210   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2211   if (size>1) {
2212     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2213     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2214   }
2215   /* now we just get the submatrix */
2216   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2217   /* clean up */
2218   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2219   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 #undef __FUNCT__
2224 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2225 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2226 {
2227   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2228   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2229 
2230   PetscFunctionBegin;
2231   if (nghosts) { *nghosts = B->nbs;}
2232   if (ghosts) {*ghosts = baij->garray;}
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2240 /*
2241     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2242 */
2243 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2244 {
2245   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2246   PetscErrorCode        ierr;
2247   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2248   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2249   const PetscInt        *is;
2250   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2251   PetscInt              *rowhit,M,cstart,cend,colb;
2252   PetscInt              *columnsforrow,l;
2253   IS                    *isa;
2254   PetscBool              done,flg;
2255   ISLocalToGlobalMapping map = mat->bmapping;
2256   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2257 
2258   PetscFunctionBegin;
2259   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2260   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2261 
2262   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2263   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2264   M                = mat->rmap->n/bs;
2265   cstart           = mat->cmap->rstart/bs;
2266   cend             = mat->cmap->rend/bs;
2267   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2268   c->N             = mat->cmap->N/bs;
2269   c->m             = mat->rmap->n/bs;
2270   c->rstart        = mat->rmap->rstart/bs;
2271 
2272   c->ncolors       = nis;
2273   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2274   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2275   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2276   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2277   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2278   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2279 
2280   /* Allow access to data structures of local part of matrix */
2281   if (!baij->colmap) {
2282     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2283   }
2284   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2285   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2286 
2287   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2288   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2289 
2290   for (i=0; i<nis; i++) {
2291     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2292     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2293     c->ncolumns[i] = n;
2294     if (n) {
2295       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2296       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2297       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2298     } else {
2299       c->columns[i]  = 0;
2300     }
2301 
2302     if (ctype == IS_COLORING_GLOBAL){
2303       /* Determine the total (parallel) number of columns of this color */
2304       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2305       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2306 
2307       nn   = PetscMPIIntCast(n);
2308       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2309       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2310       if (!nctot) {
2311         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2312       }
2313 
2314       disp[0] = 0;
2315       for (j=1; j<size; j++) {
2316         disp[j] = disp[j-1] + ncolsonproc[j-1];
2317       }
2318 
2319       /* Get complete list of columns for color on each processor */
2320       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2321       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2322       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2323     } else if (ctype == IS_COLORING_GHOSTED){
2324       /* Determine local number of columns of this color on this process, including ghost points */
2325       nctot = n;
2326       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2327       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2328     } else {
2329       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2330     }
2331 
2332     /*
2333        Mark all rows affect by these columns
2334     */
2335     /* Temporary option to allow for debugging/testing */
2336     flg  = PETSC_FALSE;
2337     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2338     if (!flg) {/*-----------------------------------------------------------------------------*/
2339       /* crude, fast version */
2340       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2341       /* loop over columns*/
2342       for (j=0; j<nctot; j++) {
2343         if (ctype == IS_COLORING_GHOSTED) {
2344           col = ltog[cols[j]];
2345         } else {
2346           col  = cols[j];
2347         }
2348         if (col >= cstart && col < cend) {
2349           /* column is in diagonal block of matrix */
2350           rows = A_cj + A_ci[col-cstart];
2351           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2352         } else {
2353 #if defined (PETSC_USE_CTABLE)
2354           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2355 	  colb --;
2356 #else
2357           colb = baij->colmap[col] - 1;
2358 #endif
2359           if (colb == -1) {
2360             m = 0;
2361           } else {
2362             colb = colb/bs;
2363             rows = B_cj + B_ci[colb];
2364             m    = B_ci[colb+1] - B_ci[colb];
2365           }
2366         }
2367         /* loop over columns marking them in rowhit */
2368         for (k=0; k<m; k++) {
2369           rowhit[*rows++] = col + 1;
2370         }
2371       }
2372 
2373       /* count the number of hits */
2374       nrows = 0;
2375       for (j=0; j<M; j++) {
2376         if (rowhit[j]) nrows++;
2377       }
2378       c->nrows[i]         = nrows;
2379       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2380       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2381       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2382       nrows = 0;
2383       for (j=0; j<M; j++) {
2384         if (rowhit[j]) {
2385           c->rows[i][nrows]           = j;
2386           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2387           nrows++;
2388         }
2389       }
2390     } else {/*-------------------------------------------------------------------------------*/
2391       /* slow version, using rowhit as a linked list */
2392       PetscInt currentcol,fm,mfm;
2393       rowhit[M] = M;
2394       nrows     = 0;
2395       /* loop over columns*/
2396       for (j=0; j<nctot; j++) {
2397         if (ctype == IS_COLORING_GHOSTED) {
2398           col = ltog[cols[j]];
2399         } else {
2400           col  = cols[j];
2401         }
2402         if (col >= cstart && col < cend) {
2403           /* column is in diagonal block of matrix */
2404           rows = A_cj + A_ci[col-cstart];
2405           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2406         } else {
2407 #if defined (PETSC_USE_CTABLE)
2408           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2409           colb --;
2410 #else
2411           colb = baij->colmap[col] - 1;
2412 #endif
2413           if (colb == -1) {
2414             m = 0;
2415           } else {
2416             colb = colb/bs;
2417             rows = B_cj + B_ci[colb];
2418             m    = B_ci[colb+1] - B_ci[colb];
2419           }
2420         }
2421 
2422         /* loop over columns marking them in rowhit */
2423         fm    = M; /* fm points to first entry in linked list */
2424         for (k=0; k<m; k++) {
2425           currentcol = *rows++;
2426 	  /* is it already in the list? */
2427           do {
2428             mfm  = fm;
2429             fm   = rowhit[fm];
2430           } while (fm < currentcol);
2431           /* not in list so add it */
2432           if (fm != currentcol) {
2433             nrows++;
2434             columnsforrow[currentcol] = col;
2435             /* next three lines insert new entry into linked list */
2436             rowhit[mfm]               = currentcol;
2437             rowhit[currentcol]        = fm;
2438             fm                        = currentcol;
2439             /* fm points to present position in list since we know the columns are sorted */
2440           } else {
2441             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2442           }
2443         }
2444       }
2445       c->nrows[i]         = nrows;
2446       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2447       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2448       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2449       /* now store the linked list of rows into c->rows[i] */
2450       nrows = 0;
2451       fm    = rowhit[M];
2452       do {
2453         c->rows[i][nrows]            = fm;
2454         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2455         fm                           = rowhit[fm];
2456       } while (fm < M);
2457     } /* ---------------------------------------------------------------------------------------*/
2458     ierr = PetscFree(cols);CHKERRQ(ierr);
2459   }
2460 
2461   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2462   /*
2463        vscale will contain the "diagonal" on processor scalings followed by the off processor
2464   */
2465   if (ctype == IS_COLORING_GLOBAL) {
2466     PetscInt *garray;
2467     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2468     for (i=0; i<baij->B->cmap->n/bs; i++) {
2469       for (j=0; j<bs; j++) {
2470         garray[i*bs+j] = bs*baij->garray[i]+j;
2471       }
2472     }
2473     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2474     ierr = PetscFree(garray);CHKERRQ(ierr);
2475     CHKMEMQ;
2476     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2477     for (k=0; k<c->ncolors; k++) {
2478       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2479       for (l=0; l<c->nrows[k]; l++) {
2480         col = c->columnsforrow[k][l];
2481         if (col >= cstart && col < cend) {
2482           /* column is in diagonal block of matrix */
2483           colb = col - cstart;
2484         } else {
2485           /* column  is in "off-processor" part */
2486 #if defined (PETSC_USE_CTABLE)
2487           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2488           colb --;
2489 #else
2490           colb = baij->colmap[col] - 1;
2491 #endif
2492           colb = colb/bs;
2493           colb += cend - cstart;
2494         }
2495         c->vscaleforrow[k][l] = colb;
2496       }
2497     }
2498   } else if (ctype == IS_COLORING_GHOSTED) {
2499     /* Get gtol mapping */
2500     PetscInt N = mat->cmap->N, *gtol;
2501     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2502     for (i=0; i<N; i++) gtol[i] = -1;
2503     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2504 
2505     c->vscale = 0; /* will be created in MatFDColoringApply() */
2506     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2507     for (k=0; k<c->ncolors; k++) {
2508       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2509       for (l=0; l<c->nrows[k]; l++) {
2510         col = c->columnsforrow[k][l];      /* global column index */
2511         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2512       }
2513     }
2514     ierr = PetscFree(gtol);CHKERRQ(ierr);
2515   }
2516   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2517 
2518   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2519   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2520   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2521   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2522     CHKMEMQ;
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2528 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2529 {
2530   Mat            B;
2531   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2532   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2533   Mat_SeqAIJ     *b;
2534   PetscErrorCode ierr;
2535   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2536   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2537   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2538 
2539   PetscFunctionBegin;
2540   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2541   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2542 
2543   /* ----------------------------------------------------------------
2544      Tell every processor the number of nonzeros per row
2545   */
2546   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2547   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2548     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2549   }
2550   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2551   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2552   displs     = recvcounts + size;
2553   for (i=0; i<size; i++) {
2554     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2555     displs[i]     = A->rmap->range[i]/bs;
2556   }
2557 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2558   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2559 #else
2560   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2561 #endif
2562   /* ---------------------------------------------------------------
2563      Create the sequential matrix of the same type as the local block diagonal
2564   */
2565   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2566   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2567   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2568   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2569   b = (Mat_SeqAIJ *)B->data;
2570 
2571   /*--------------------------------------------------------------------
2572     Copy my part of matrix column indices over
2573   */
2574   sendcount  = ad->nz + bd->nz;
2575   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2576   a_jsendbuf = ad->j;
2577   b_jsendbuf = bd->j;
2578   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2579   cnt        = 0;
2580   for (i=0; i<n; i++) {
2581 
2582     /* put in lower diagonal portion */
2583     m = bd->i[i+1] - bd->i[i];
2584     while (m > 0) {
2585       /* is it above diagonal (in bd (compressed) numbering) */
2586       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2587       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2588       m--;
2589     }
2590 
2591     /* put in diagonal portion */
2592     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2593       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2594     }
2595 
2596     /* put in upper diagonal portion */
2597     while (m-- > 0) {
2598       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2599     }
2600   }
2601   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2602 
2603   /*--------------------------------------------------------------------
2604     Gather all column indices to all processors
2605   */
2606   for (i=0; i<size; i++) {
2607     recvcounts[i] = 0;
2608     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2609       recvcounts[i] += lens[j];
2610     }
2611   }
2612   displs[0]  = 0;
2613   for (i=1; i<size; i++) {
2614     displs[i] = displs[i-1] + recvcounts[i-1];
2615   }
2616 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2617   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2618 #else
2619   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2620 #endif
2621   /*--------------------------------------------------------------------
2622     Assemble the matrix into useable form (note numerical values not yet set)
2623   */
2624   /* set the b->ilen (length of each row) values */
2625   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2626   /* set the b->i indices */
2627   b->i[0] = 0;
2628   for (i=1; i<=A->rmap->N/bs; i++) {
2629     b->i[i] = b->i[i-1] + lens[i-1];
2630   }
2631   ierr = PetscFree(lens);CHKERRQ(ierr);
2632   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2633   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2634   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2635 
2636   if (A->symmetric){
2637     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2638   } else if (A->hermitian) {
2639     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2640   } else if (A->structurally_symmetric) {
2641     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2642   }
2643   *newmat = B;
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 #undef __FUNCT__
2648 #define __FUNCT__ "MatSOR_MPIBAIJ"
2649 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2650 {
2651   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2652   PetscErrorCode ierr;
2653   Vec            bb1 = 0;
2654 
2655   PetscFunctionBegin;
2656   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2657     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2658   }
2659 
2660   if (flag == SOR_APPLY_UPPER) {
2661     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2662     PetscFunctionReturn(0);
2663   }
2664 
2665   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2666     if (flag & SOR_ZERO_INITIAL_GUESS) {
2667       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2668       its--;
2669     }
2670 
2671     while (its--) {
2672       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2673       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2674 
2675       /* update rhs: bb1 = bb - B*x */
2676       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2677       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2678 
2679       /* local sweep */
2680       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2681     }
2682   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2683     if (flag & SOR_ZERO_INITIAL_GUESS) {
2684       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2685       its--;
2686     }
2687     while (its--) {
2688       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2689       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2690 
2691       /* update rhs: bb1 = bb - B*x */
2692       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2693       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2694 
2695       /* local sweep */
2696       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2697     }
2698   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2699     if (flag & SOR_ZERO_INITIAL_GUESS) {
2700       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2701       its--;
2702     }
2703     while (its--) {
2704       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2705       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2706 
2707       /* update rhs: bb1 = bb - B*x */
2708       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2709       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2710 
2711       /* local sweep */
2712       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2713     }
2714   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2715 
2716   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2717   PetscFunctionReturn(0);
2718 }
2719 
2720 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2721 
2722 
2723 /* -------------------------------------------------------------------*/
2724 static struct _MatOps MatOps_Values = {
2725        MatSetValues_MPIBAIJ,
2726        MatGetRow_MPIBAIJ,
2727        MatRestoreRow_MPIBAIJ,
2728        MatMult_MPIBAIJ,
2729 /* 4*/ MatMultAdd_MPIBAIJ,
2730        MatMultTranspose_MPIBAIJ,
2731        MatMultTransposeAdd_MPIBAIJ,
2732        0,
2733        0,
2734        0,
2735 /*10*/ 0,
2736        0,
2737        0,
2738        MatSOR_MPIBAIJ,
2739        MatTranspose_MPIBAIJ,
2740 /*15*/ MatGetInfo_MPIBAIJ,
2741        MatEqual_MPIBAIJ,
2742        MatGetDiagonal_MPIBAIJ,
2743        MatDiagonalScale_MPIBAIJ,
2744        MatNorm_MPIBAIJ,
2745 /*20*/ MatAssemblyBegin_MPIBAIJ,
2746        MatAssemblyEnd_MPIBAIJ,
2747        MatSetOption_MPIBAIJ,
2748        MatZeroEntries_MPIBAIJ,
2749 /*24*/ MatZeroRows_MPIBAIJ,
2750        0,
2751        0,
2752        0,
2753        0,
2754 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2755        0,
2756        0,
2757        0,
2758        0,
2759 /*34*/ MatDuplicate_MPIBAIJ,
2760        0,
2761        0,
2762        0,
2763        0,
2764 /*39*/ MatAXPY_MPIBAIJ,
2765        MatGetSubMatrices_MPIBAIJ,
2766        MatIncreaseOverlap_MPIBAIJ,
2767        MatGetValues_MPIBAIJ,
2768        MatCopy_MPIBAIJ,
2769 /*44*/ 0,
2770        MatScale_MPIBAIJ,
2771        0,
2772        0,
2773        0,
2774 /*49*/ MatSetBlockSize_MPIBAIJ,
2775        0,
2776        0,
2777        0,
2778        0,
2779 /*54*/ MatFDColoringCreate_MPIBAIJ,
2780        0,
2781        MatSetUnfactored_MPIBAIJ,
2782        MatPermute_MPIBAIJ,
2783        MatSetValuesBlocked_MPIBAIJ,
2784 /*59*/ MatGetSubMatrix_MPIBAIJ,
2785        MatDestroy_MPIBAIJ,
2786        MatView_MPIBAIJ,
2787        0,
2788        0,
2789 /*64*/ 0,
2790        0,
2791        0,
2792        0,
2793        0,
2794 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2795        0,
2796        0,
2797        0,
2798        0,
2799 /*74*/ 0,
2800        MatFDColoringApply_BAIJ,
2801        0,
2802        0,
2803        0,
2804 /*79*/ 0,
2805        0,
2806        0,
2807        0,
2808        MatLoad_MPIBAIJ,
2809 /*84*/ 0,
2810        0,
2811        0,
2812        0,
2813        0,
2814 /*89*/ 0,
2815        0,
2816        0,
2817        0,
2818        0,
2819 /*94*/ 0,
2820        0,
2821        0,
2822        0,
2823        0,
2824 /*99*/ 0,
2825        0,
2826        0,
2827        0,
2828        0,
2829 /*104*/0,
2830        MatRealPart_MPIBAIJ,
2831        MatImaginaryPart_MPIBAIJ,
2832        0,
2833        0,
2834 /*109*/0,
2835        0,
2836        0,
2837        0,
2838        0,
2839 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2840        0,
2841        MatGetGhosts_MPIBAIJ,
2842        0,
2843        0,
2844 /*119*/0,
2845        0,
2846        0,
2847        0
2848 };
2849 
2850 EXTERN_C_BEGIN
2851 #undef __FUNCT__
2852 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2853 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool  *iscopy,MatReuse reuse,Mat *a)
2854 {
2855   PetscFunctionBegin;
2856   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2857   *iscopy = PETSC_FALSE;
2858   PetscFunctionReturn(0);
2859 }
2860 EXTERN_C_END
2861 
2862 EXTERN_C_BEGIN
2863 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2864 EXTERN_C_END
2865 
2866 EXTERN_C_BEGIN
2867 #undef __FUNCT__
2868 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2869 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2870 {
2871   PetscInt       m,rstart,cstart,cend;
2872   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2873   const PetscInt *JJ=0;
2874   PetscScalar    *values=0;
2875   PetscErrorCode ierr;
2876 
2877   PetscFunctionBegin;
2878 
2879   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2880   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2881   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2882   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2883   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2884   m      = B->rmap->n/bs;
2885   rstart = B->rmap->rstart/bs;
2886   cstart = B->cmap->rstart/bs;
2887   cend   = B->cmap->rend/bs;
2888 
2889   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2890   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2891   for (i=0; i<m; i++) {
2892     nz = ii[i+1] - ii[i];
2893     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2894     nz_max = PetscMax(nz_max,nz);
2895     JJ  = jj + ii[i];
2896     for (j=0; j<nz; j++) {
2897       if (*JJ >= cstart) break;
2898       JJ++;
2899     }
2900     d = 0;
2901     for (; j<nz; j++) {
2902       if (*JJ++ >= cend) break;
2903       d++;
2904     }
2905     d_nnz[i] = d;
2906     o_nnz[i] = nz - d;
2907   }
2908   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2909   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2910 
2911   values = (PetscScalar*)V;
2912   if (!values) {
2913     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2914     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2915   }
2916   for (i=0; i<m; i++) {
2917     PetscInt          row    = i + rstart;
2918     PetscInt          ncols  = ii[i+1] - ii[i];
2919     const PetscInt    *icols = jj + ii[i];
2920     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2921     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2922   }
2923 
2924   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2925   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2926   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2927 
2928   PetscFunctionReturn(0);
2929 }
2930 EXTERN_C_END
2931 
2932 #undef __FUNCT__
2933 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2934 /*@C
2935    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2936    (the default parallel PETSc format).
2937 
2938    Collective on MPI_Comm
2939 
2940    Input Parameters:
2941 +  A - the matrix
2942 .  bs - the block size
2943 .  i - the indices into j for the start of each local row (starts with zero)
2944 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2945 -  v - optional values in the matrix
2946 
2947    Level: developer
2948 
2949 .keywords: matrix, aij, compressed row, sparse, parallel
2950 
2951 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2952 @*/
2953 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2954 {
2955   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2956 
2957   PetscFunctionBegin;
2958   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2959   if (f) {
2960     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2961   }
2962   PetscFunctionReturn(0);
2963 }
2964 
2965 EXTERN_C_BEGIN
2966 #undef __FUNCT__
2967 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2968 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2969 {
2970   Mat_MPIBAIJ    *b;
2971   PetscErrorCode ierr;
2972   PetscInt       i, newbs = PetscAbs(bs);
2973 
2974   PetscFunctionBegin;
2975   if (bs < 0) {
2976     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2977       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2978     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2979     bs   = PetscAbs(bs);
2980   }
2981   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2982   bs = newbs;
2983 
2984 
2985   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2986   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2987   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2988   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2989   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2990 
2991   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2992   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2993   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2994   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2995 
2996   if (d_nnz) {
2997     for (i=0; i<B->rmap->n/bs; i++) {
2998       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2999     }
3000   }
3001   if (o_nnz) {
3002     for (i=0; i<B->rmap->n/bs; i++) {
3003       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3004     }
3005   }
3006 
3007   b = (Mat_MPIBAIJ*)B->data;
3008   b->bs2 = bs*bs;
3009   b->mbs = B->rmap->n/bs;
3010   b->nbs = B->cmap->n/bs;
3011   b->Mbs = B->rmap->N/bs;
3012   b->Nbs = B->cmap->N/bs;
3013 
3014   for (i=0; i<=b->size; i++) {
3015     b->rangebs[i] = B->rmap->range[i]/bs;
3016   }
3017   b->rstartbs = B->rmap->rstart/bs;
3018   b->rendbs   = B->rmap->rend/bs;
3019   b->cstartbs = B->cmap->rstart/bs;
3020   b->cendbs   = B->cmap->rend/bs;
3021 
3022   if (!B->preallocated) {
3023     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3024     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3025     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3026     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3027     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3028     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3029     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3030     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3031     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3032   }
3033 
3034   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3035   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3036   B->preallocated = PETSC_TRUE;
3037   PetscFunctionReturn(0);
3038 }
3039 EXTERN_C_END
3040 
3041 EXTERN_C_BEGIN
3042 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3043 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3044 EXTERN_C_END
3045 
3046 
3047 EXTERN_C_BEGIN
3048 #undef __FUNCT__
3049 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3050 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3051 {
3052   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3053   PetscErrorCode ierr;
3054   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3055   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3056   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3057 
3058   PetscFunctionBegin;
3059   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3060   ii[0] = 0;
3061   CHKMEMQ;
3062   for (i=0; i<M; i++) {
3063     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3064     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3065     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3066     /* remove one from count of matrix has diagonal */
3067     for (j=id[i]; j<id[i+1]; j++) {
3068       if (jd[j] == i) {ii[i+1]--;break;}
3069     }
3070   CHKMEMQ;
3071   }
3072   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3073   cnt = 0;
3074   for (i=0; i<M; i++) {
3075     for (j=io[i]; j<io[i+1]; j++) {
3076       if (garray[jo[j]] > rstart) break;
3077       jj[cnt++] = garray[jo[j]];
3078   CHKMEMQ;
3079     }
3080     for (k=id[i]; k<id[i+1]; k++) {
3081       if (jd[k] != i) {
3082         jj[cnt++] = rstart + jd[k];
3083   CHKMEMQ;
3084       }
3085     }
3086     for (;j<io[i+1]; j++) {
3087       jj[cnt++] = garray[jo[j]];
3088   CHKMEMQ;
3089     }
3090   }
3091   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3092   PetscFunctionReturn(0);
3093 }
3094 EXTERN_C_END
3095 
3096 EXTERN_C_BEGIN
3097 #if defined(PETSC_HAVE_MUMPS)
3098 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3099 #endif
3100 EXTERN_C_END
3101 
3102 /*MC
3103    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3104 
3105    Options Database Keys:
3106 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3107 . -mat_block_size <bs> - set the blocksize used to store the matrix
3108 - -mat_use_hash_table <fact>
3109 
3110   Level: beginner
3111 
3112 .seealso: MatCreateMPIBAIJ
3113 M*/
3114 
3115 EXTERN_C_BEGIN
3116 #undef __FUNCT__
3117 #define __FUNCT__ "MatCreate_MPIBAIJ"
3118 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
3119 {
3120   Mat_MPIBAIJ    *b;
3121   PetscErrorCode ierr;
3122   PetscBool      flg;
3123 
3124   PetscFunctionBegin;
3125   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3126   B->data = (void*)b;
3127 
3128 
3129   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3130   B->mapping    = 0;
3131   B->assembled  = PETSC_FALSE;
3132 
3133   B->insertmode = NOT_SET_VALUES;
3134   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3135   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3136 
3137   /* build local table of row and column ownerships */
3138   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3139 
3140   /* build cache for off array entries formed */
3141   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3142   b->donotstash  = PETSC_FALSE;
3143   b->colmap      = PETSC_NULL;
3144   b->garray      = PETSC_NULL;
3145   b->roworiented = PETSC_TRUE;
3146 
3147   /* stuff used in block assembly */
3148   b->barray       = 0;
3149 
3150   /* stuff used for matrix vector multiply */
3151   b->lvec         = 0;
3152   b->Mvctx        = 0;
3153 
3154   /* stuff for MatGetRow() */
3155   b->rowindices   = 0;
3156   b->rowvalues    = 0;
3157   b->getrowactive = PETSC_FALSE;
3158 
3159   /* hash table stuff */
3160   b->ht           = 0;
3161   b->hd           = 0;
3162   b->ht_size      = 0;
3163   b->ht_flag      = PETSC_FALSE;
3164   b->ht_fact      = 0;
3165   b->ht_total_ct  = 0;
3166   b->ht_insert_ct = 0;
3167 
3168   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3169     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3170     if (flg) {
3171       PetscReal fact = 1.39;
3172       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3173       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3174       if (fact <= 1.0) fact = 1.39;
3175       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3176       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3177     }
3178   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3179 
3180 #if defined(PETSC_HAVE_MUMPS)
3181   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3182 #endif
3183   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3184                                      "MatConvert_MPIBAIJ_MPIAdj",
3185                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3186   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3187                                      "MatStoreValues_MPIBAIJ",
3188                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3189   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3190                                      "MatRetrieveValues_MPIBAIJ",
3191                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3192   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3193                                      "MatGetDiagonalBlock_MPIBAIJ",
3194                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3195   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3196                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3197                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3198   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3199 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3200 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3201   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3202                                      "MatDiagonalScaleLocal_MPIBAIJ",
3203                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3204   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3205                                      "MatSetHashTableFactor_MPIBAIJ",
3206                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3207   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3208   PetscFunctionReturn(0);
3209 }
3210 EXTERN_C_END
3211 
3212 /*MC
3213    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3214 
3215    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3216    and MATMPIBAIJ otherwise.
3217 
3218    Options Database Keys:
3219 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3220 
3221   Level: beginner
3222 
3223 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3224 M*/
3225 
3226 EXTERN_C_BEGIN
3227 #undef __FUNCT__
3228 #define __FUNCT__ "MatCreate_BAIJ"
3229 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3230 {
3231   PetscErrorCode ierr;
3232   PetscMPIInt    size;
3233 
3234   PetscFunctionBegin;
3235   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3236   if (size == 1) {
3237     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3238   } else {
3239     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3240   }
3241   PetscFunctionReturn(0);
3242 }
3243 EXTERN_C_END
3244 
3245 #undef __FUNCT__
3246 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3247 /*@C
3248    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3249    (block compressed row).  For good matrix assembly performance
3250    the user should preallocate the matrix storage by setting the parameters
3251    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3252    performance can be increased by more than a factor of 50.
3253 
3254    Collective on Mat
3255 
3256    Input Parameters:
3257 +  A - the matrix
3258 .  bs   - size of blockk
3259 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3260            submatrix  (same for all local rows)
3261 .  d_nnz - array containing the number of block nonzeros in the various block rows
3262            of the in diagonal portion of the local (possibly different for each block
3263            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3264 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3265            submatrix (same for all local rows).
3266 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3267            off-diagonal portion of the local submatrix (possibly different for
3268            each block row) or PETSC_NULL.
3269 
3270    If the *_nnz parameter is given then the *_nz parameter is ignored
3271 
3272    Options Database Keys:
3273 +   -mat_block_size - size of the blocks to use
3274 -   -mat_use_hash_table <fact>
3275 
3276    Notes:
3277    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3278    than it must be used on all processors that share the object for that argument.
3279 
3280    Storage Information:
3281    For a square global matrix we define each processor's diagonal portion
3282    to be its local rows and the corresponding columns (a square submatrix);
3283    each processor's off-diagonal portion encompasses the remainder of the
3284    local matrix (a rectangular submatrix).
3285 
3286    The user can specify preallocated storage for the diagonal part of
3287    the local submatrix with either d_nz or d_nnz (not both).  Set
3288    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3289    memory allocation.  Likewise, specify preallocated storage for the
3290    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3291 
3292    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3293    the figure below we depict these three local rows and all columns (0-11).
3294 
3295 .vb
3296            0 1 2 3 4 5 6 7 8 9 10 11
3297           -------------------
3298    row 3  |  o o o d d d o o o o o o
3299    row 4  |  o o o d d d o o o o o o
3300    row 5  |  o o o d d d o o o o o o
3301           -------------------
3302 .ve
3303 
3304    Thus, any entries in the d locations are stored in the d (diagonal)
3305    submatrix, and any entries in the o locations are stored in the
3306    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3307    stored simply in the MATSEQBAIJ format for compressed row storage.
3308 
3309    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3310    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3311    In general, for PDE problems in which most nonzeros are near the diagonal,
3312    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3313    or you will get TERRIBLE performance; see the users' manual chapter on
3314    matrices.
3315 
3316    You can call MatGetInfo() to get information on how effective the preallocation was;
3317    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3318    You can also run with the option -info and look for messages with the string
3319    malloc in them to see if additional memory allocation was needed.
3320 
3321    Level: intermediate
3322 
3323 .keywords: matrix, block, aij, compressed row, sparse, parallel
3324 
3325 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3326 @*/
3327 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3328 {
3329   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3330 
3331   PetscFunctionBegin;
3332   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3333   if (f) {
3334     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3335   }
3336   PetscFunctionReturn(0);
3337 }
3338 
3339 #undef __FUNCT__
3340 #define __FUNCT__ "MatCreateMPIBAIJ"
3341 /*@C
3342    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3343    (block compressed row).  For good matrix assembly performance
3344    the user should preallocate the matrix storage by setting the parameters
3345    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3346    performance can be increased by more than a factor of 50.
3347 
3348    Collective on MPI_Comm
3349 
3350    Input Parameters:
3351 +  comm - MPI communicator
3352 .  bs   - size of blockk
3353 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3354            This value should be the same as the local size used in creating the
3355            y vector for the matrix-vector product y = Ax.
3356 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3357            This value should be the same as the local size used in creating the
3358            x vector for the matrix-vector product y = Ax.
3359 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3360 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3361 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3362            submatrix  (same for all local rows)
3363 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3364            of the in diagonal portion of the local (possibly different for each block
3365            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3366 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3367            submatrix (same for all local rows).
3368 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3369            off-diagonal portion of the local submatrix (possibly different for
3370            each block row) or PETSC_NULL.
3371 
3372    Output Parameter:
3373 .  A - the matrix
3374 
3375    Options Database Keys:
3376 +   -mat_block_size - size of the blocks to use
3377 -   -mat_use_hash_table <fact>
3378 
3379    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3380    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3381    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3382 
3383    Notes:
3384    If the *_nnz parameter is given then the *_nz parameter is ignored
3385 
3386    A nonzero block is any block that as 1 or more nonzeros in it
3387 
3388    The user MUST specify either the local or global matrix dimensions
3389    (possibly both).
3390 
3391    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3392    than it must be used on all processors that share the object for that argument.
3393 
3394    Storage Information:
3395    For a square global matrix we define each processor's diagonal portion
3396    to be its local rows and the corresponding columns (a square submatrix);
3397    each processor's off-diagonal portion encompasses the remainder of the
3398    local matrix (a rectangular submatrix).
3399 
3400    The user can specify preallocated storage for the diagonal part of
3401    the local submatrix with either d_nz or d_nnz (not both).  Set
3402    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3403    memory allocation.  Likewise, specify preallocated storage for the
3404    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3405 
3406    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3407    the figure below we depict these three local rows and all columns (0-11).
3408 
3409 .vb
3410            0 1 2 3 4 5 6 7 8 9 10 11
3411           -------------------
3412    row 3  |  o o o d d d o o o o o o
3413    row 4  |  o o o d d d o o o o o o
3414    row 5  |  o o o d d d o o o o o o
3415           -------------------
3416 .ve
3417 
3418    Thus, any entries in the d locations are stored in the d (diagonal)
3419    submatrix, and any entries in the o locations are stored in the
3420    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3421    stored simply in the MATSEQBAIJ format for compressed row storage.
3422 
3423    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3424    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3425    In general, for PDE problems in which most nonzeros are near the diagonal,
3426    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3427    or you will get TERRIBLE performance; see the users' manual chapter on
3428    matrices.
3429 
3430    Level: intermediate
3431 
3432 .keywords: matrix, block, aij, compressed row, sparse, parallel
3433 
3434 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3435 @*/
3436 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3437 {
3438   PetscErrorCode ierr;
3439   PetscMPIInt    size;
3440 
3441   PetscFunctionBegin;
3442   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3443   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3444   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3445   if (size > 1) {
3446     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3447     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3448   } else {
3449     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3450     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3451   }
3452   PetscFunctionReturn(0);
3453 }
3454 
3455 #undef __FUNCT__
3456 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3457 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3458 {
3459   Mat            mat;
3460   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3461   PetscErrorCode ierr;
3462   PetscInt       len=0;
3463 
3464   PetscFunctionBegin;
3465   *newmat       = 0;
3466   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3467   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3468   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3469   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3470 
3471   mat->factortype   = matin->factortype;
3472   mat->preallocated = PETSC_TRUE;
3473   mat->assembled    = PETSC_TRUE;
3474   mat->insertmode   = NOT_SET_VALUES;
3475 
3476   a      = (Mat_MPIBAIJ*)mat->data;
3477   mat->rmap->bs  = matin->rmap->bs;
3478   a->bs2   = oldmat->bs2;
3479   a->mbs   = oldmat->mbs;
3480   a->nbs   = oldmat->nbs;
3481   a->Mbs   = oldmat->Mbs;
3482   a->Nbs   = oldmat->Nbs;
3483 
3484   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3485   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3486 
3487   a->size         = oldmat->size;
3488   a->rank         = oldmat->rank;
3489   a->donotstash   = oldmat->donotstash;
3490   a->roworiented  = oldmat->roworiented;
3491   a->rowindices   = 0;
3492   a->rowvalues    = 0;
3493   a->getrowactive = PETSC_FALSE;
3494   a->barray       = 0;
3495   a->rstartbs     = oldmat->rstartbs;
3496   a->rendbs       = oldmat->rendbs;
3497   a->cstartbs     = oldmat->cstartbs;
3498   a->cendbs       = oldmat->cendbs;
3499 
3500   /* hash table stuff */
3501   a->ht           = 0;
3502   a->hd           = 0;
3503   a->ht_size      = 0;
3504   a->ht_flag      = oldmat->ht_flag;
3505   a->ht_fact      = oldmat->ht_fact;
3506   a->ht_total_ct  = 0;
3507   a->ht_insert_ct = 0;
3508 
3509   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3510   if (oldmat->colmap) {
3511 #if defined (PETSC_USE_CTABLE)
3512   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3513 #else
3514   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3515   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3516   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3517 #endif
3518   } else a->colmap = 0;
3519 
3520   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3521     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3522     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3523     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3524   } else a->garray = 0;
3525 
3526   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3527   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3528   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3529   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3530   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3531 
3532   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3533   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3534   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3535   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3536   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3537   *newmat = mat;
3538 
3539   PetscFunctionReturn(0);
3540 }
3541 
3542 #undef __FUNCT__
3543 #define __FUNCT__ "MatLoad_MPIBAIJ"
3544 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3545 {
3546   PetscErrorCode ierr;
3547   int            fd;
3548   PetscInt       i,nz,j,rstart,rend;
3549   PetscScalar    *vals,*buf;
3550   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3551   MPI_Status     status;
3552   PetscMPIInt    rank,size,maxnz;
3553   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3554   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3555   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3556   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3557   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3558   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3559 
3560   PetscFunctionBegin;
3561   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3562     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3563   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3564 
3565   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3566   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3567   if (!rank) {
3568     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3569     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3570     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3571   }
3572 
3573   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3574 
3575   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3576   M = header[1]; N = header[2];
3577 
3578   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3579   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3580   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3581 
3582   /* If global sizes are set, check if they are consistent with that given in the file */
3583   if (sizesset) {
3584     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3585   }
3586   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3587   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3588 
3589   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3590 
3591   /*
3592      This code adds extra rows to make sure the number of rows is
3593      divisible by the blocksize
3594   */
3595   Mbs        = M/bs;
3596   extra_rows = bs - M + bs*Mbs;
3597   if (extra_rows == bs) extra_rows = 0;
3598   else                  Mbs++;
3599   if (extra_rows && !rank) {
3600     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3601   }
3602 
3603   /* determine ownership of all rows */
3604   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3605     mbs        = Mbs/size + ((Mbs % size) > rank);
3606     m          = mbs*bs;
3607   } else { /* User set */
3608     m          = newmat->rmap->n;
3609     mbs        = m/bs;
3610   }
3611   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3612   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3613 
3614   /* process 0 needs enough room for process with most rows */
3615   if (!rank) {
3616     mmax = rowners[1];
3617     for (i=2; i<size; i++) {
3618       mmax = PetscMax(mmax,rowners[i]);
3619     }
3620     mmax*=bs;
3621   } else mmax = m;
3622 
3623   rowners[0] = 0;
3624   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3625   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3626   rstart = rowners[rank];
3627   rend   = rowners[rank+1];
3628 
3629   /* distribute row lengths to all processors */
3630   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3631   if (!rank) {
3632     mend = m;
3633     if (size == 1) mend = mend - extra_rows;
3634     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3635     for (j=mend; j<m; j++) locrowlens[j] = 1;
3636     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3637     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3638     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3639     for (j=0; j<m; j++) {
3640       procsnz[0] += locrowlens[j];
3641     }
3642     for (i=1; i<size; i++) {
3643       mend = browners[i+1] - browners[i];
3644       if (i == size-1) mend = mend - extra_rows;
3645       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3646       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3647       /* calculate the number of nonzeros on each processor */
3648       for (j=0; j<browners[i+1]-browners[i]; j++) {
3649         procsnz[i] += rowlengths[j];
3650       }
3651       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3652     }
3653     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3654   } else {
3655     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3656   }
3657 
3658   if (!rank) {
3659     /* determine max buffer needed and allocate it */
3660     maxnz = procsnz[0];
3661     for (i=1; i<size; i++) {
3662       maxnz = PetscMax(maxnz,procsnz[i]);
3663     }
3664     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3665 
3666     /* read in my part of the matrix column indices  */
3667     nz     = procsnz[0];
3668     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3669     mycols = ibuf;
3670     if (size == 1)  nz -= extra_rows;
3671     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3672     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3673 
3674     /* read in every ones (except the last) and ship off */
3675     for (i=1; i<size-1; i++) {
3676       nz   = procsnz[i];
3677       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3678       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3679     }
3680     /* read in the stuff for the last proc */
3681     if (size != 1) {
3682       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3683       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3684       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3685       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3686     }
3687     ierr = PetscFree(cols);CHKERRQ(ierr);
3688   } else {
3689     /* determine buffer space needed for message */
3690     nz = 0;
3691     for (i=0; i<m; i++) {
3692       nz += locrowlens[i];
3693     }
3694     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3695     mycols = ibuf;
3696     /* receive message of column indices*/
3697     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3698     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3699     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3700   }
3701 
3702   /* loop over local rows, determining number of off diagonal entries */
3703   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3704   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3705   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3706   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3707   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3708   rowcount = 0; nzcount = 0;
3709   for (i=0; i<mbs; i++) {
3710     dcount  = 0;
3711     odcount = 0;
3712     for (j=0; j<bs; j++) {
3713       kmax = locrowlens[rowcount];
3714       for (k=0; k<kmax; k++) {
3715         tmp = mycols[nzcount++]/bs;
3716         if (!mask[tmp]) {
3717           mask[tmp] = 1;
3718           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3719           else masked1[dcount++] = tmp;
3720         }
3721       }
3722       rowcount++;
3723     }
3724 
3725     dlens[i]  = dcount;
3726     odlens[i] = odcount;
3727 
3728     /* zero out the mask elements we set */
3729     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3730     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3731   }
3732 
3733 
3734   if (!sizesset) {
3735     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3736   }
3737   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3738 
3739   if (!rank) {
3740     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3741     /* read in my part of the matrix numerical values  */
3742     nz = procsnz[0];
3743     vals = buf;
3744     mycols = ibuf;
3745     if (size == 1)  nz -= extra_rows;
3746     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3747     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3748 
3749     /* insert into matrix */
3750     jj      = rstart*bs;
3751     for (i=0; i<m; i++) {
3752       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3753       mycols += locrowlens[i];
3754       vals   += locrowlens[i];
3755       jj++;
3756     }
3757     /* read in other processors (except the last one) and ship out */
3758     for (i=1; i<size-1; i++) {
3759       nz   = procsnz[i];
3760       vals = buf;
3761       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3762       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3763     }
3764     /* the last proc */
3765     if (size != 1){
3766       nz   = procsnz[i] - extra_rows;
3767       vals = buf;
3768       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3769       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3770       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3771     }
3772     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3773   } else {
3774     /* receive numeric values */
3775     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3776 
3777     /* receive message of values*/
3778     vals   = buf;
3779     mycols = ibuf;
3780     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3781     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3782     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3783 
3784     /* insert into matrix */
3785     jj      = rstart*bs;
3786     for (i=0; i<m; i++) {
3787       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3788       mycols += locrowlens[i];
3789       vals   += locrowlens[i];
3790       jj++;
3791     }
3792   }
3793   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3794   ierr = PetscFree(buf);CHKERRQ(ierr);
3795   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3796   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3797   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3798   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3799   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3800   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3801 
3802   PetscFunctionReturn(0);
3803 }
3804 
3805 #undef __FUNCT__
3806 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3807 /*@
3808    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3809 
3810    Input Parameters:
3811 .  mat  - the matrix
3812 .  fact - factor
3813 
3814    Not Collective, each process can use a different factor
3815 
3816    Level: advanced
3817 
3818   Notes:
3819    This can also be set by the command line option: -mat_use_hash_table <fact>
3820 
3821 .keywords: matrix, hashtable, factor, HT
3822 
3823 .seealso: MatSetOption()
3824 @*/
3825 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3826 {
3827   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3828 
3829   PetscFunctionBegin;
3830   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3831   if (f) {
3832     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3833   }
3834   PetscFunctionReturn(0);
3835 }
3836 
3837 EXTERN_C_BEGIN
3838 #undef __FUNCT__
3839 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3840 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3841 {
3842   Mat_MPIBAIJ *baij;
3843 
3844   PetscFunctionBegin;
3845   baij = (Mat_MPIBAIJ*)mat->data;
3846   baij->ht_fact = fact;
3847   PetscFunctionReturn(0);
3848 }
3849 EXTERN_C_END
3850 
3851 #undef __FUNCT__
3852 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3853 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3854 {
3855   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3856   PetscFunctionBegin;
3857   *Ad     = a->A;
3858   *Ao     = a->B;
3859   *colmap = a->garray;
3860   PetscFunctionReturn(0);
3861 }
3862 
3863 /*
3864     Special version for direct calls from Fortran (to eliminate two function call overheads
3865 */
3866 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3867 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3868 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3869 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3870 #endif
3871 
3872 #undef __FUNCT__
3873 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3874 /*@C
3875   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3876 
3877   Collective on Mat
3878 
3879   Input Parameters:
3880 + mat - the matrix
3881 . min - number of input rows
3882 . im - input rows
3883 . nin - number of input columns
3884 . in - input columns
3885 . v - numerical values input
3886 - addvin - INSERT_VALUES or ADD_VALUES
3887 
3888   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3889 
3890   Level: advanced
3891 
3892 .seealso:   MatSetValuesBlocked()
3893 @*/
3894 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3895 {
3896   /* convert input arguments to C version */
3897   Mat             mat = *matin;
3898   PetscInt        m = *min, n = *nin;
3899   InsertMode      addv = *addvin;
3900 
3901   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3902   const MatScalar *value;
3903   MatScalar       *barray=baij->barray;
3904   PetscBool       roworiented = baij->roworiented;
3905   PetscErrorCode  ierr;
3906   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3907   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3908   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3909 
3910   PetscFunctionBegin;
3911   /* tasks normally handled by MatSetValuesBlocked() */
3912   if (mat->insertmode == NOT_SET_VALUES) {
3913     mat->insertmode = addv;
3914   }
3915 #if defined(PETSC_USE_DEBUG)
3916   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3917   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3918 #endif
3919   if (mat->assembled) {
3920     mat->was_assembled = PETSC_TRUE;
3921     mat->assembled     = PETSC_FALSE;
3922   }
3923   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3924 
3925 
3926   if(!barray) {
3927     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3928     baij->barray = barray;
3929   }
3930 
3931   if (roworiented) {
3932     stepval = (n-1)*bs;
3933   } else {
3934     stepval = (m-1)*bs;
3935   }
3936   for (i=0; i<m; i++) {
3937     if (im[i] < 0) continue;
3938 #if defined(PETSC_USE_DEBUG)
3939     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3940 #endif
3941     if (im[i] >= rstart && im[i] < rend) {
3942       row = im[i] - rstart;
3943       for (j=0; j<n; j++) {
3944         /* If NumCol = 1 then a copy is not required */
3945         if ((roworiented) && (n == 1)) {
3946           barray = (MatScalar*)v + i*bs2;
3947         } else if((!roworiented) && (m == 1)) {
3948           barray = (MatScalar*)v + j*bs2;
3949         } else { /* Here a copy is required */
3950           if (roworiented) {
3951             value = v + i*(stepval+bs)*bs + j*bs;
3952           } else {
3953             value = v + j*(stepval+bs)*bs + i*bs;
3954           }
3955           for (ii=0; ii<bs; ii++,value+=stepval) {
3956             for (jj=0; jj<bs; jj++) {
3957               *barray++  = *value++;
3958             }
3959           }
3960           barray -=bs2;
3961         }
3962 
3963         if (in[j] >= cstart && in[j] < cend){
3964           col  = in[j] - cstart;
3965           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3966         }
3967         else if (in[j] < 0) continue;
3968 #if defined(PETSC_USE_DEBUG)
3969         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3970 #endif
3971         else {
3972           if (mat->was_assembled) {
3973             if (!baij->colmap) {
3974               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3975             }
3976 
3977 #if defined(PETSC_USE_DEBUG)
3978 #if defined (PETSC_USE_CTABLE)
3979             { PetscInt data;
3980               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3981               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3982             }
3983 #else
3984             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3985 #endif
3986 #endif
3987 #if defined (PETSC_USE_CTABLE)
3988 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3989             col  = (col - 1)/bs;
3990 #else
3991             col = (baij->colmap[in[j]] - 1)/bs;
3992 #endif
3993             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3994               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3995               col =  in[j];
3996             }
3997           }
3998           else col = in[j];
3999           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4000         }
4001       }
4002     } else {
4003       if (!baij->donotstash) {
4004         if (roworiented) {
4005           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4006         } else {
4007           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4008         }
4009       }
4010     }
4011   }
4012 
4013   /* task normally handled by MatSetValuesBlocked() */
4014   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4015   PetscFunctionReturn(0);
4016 }
4017 
4018 #undef __FUNCT__
4019 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4020 /*@
4021      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4022          CSR format the local rows.
4023 
4024    Collective on MPI_Comm
4025 
4026    Input Parameters:
4027 +  comm - MPI communicator
4028 .  bs - the block size, only a block size of 1 is supported
4029 .  m - number of local rows (Cannot be PETSC_DECIDE)
4030 .  n - This value should be the same as the local size used in creating the
4031        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4032        calculated if N is given) For square matrices n is almost always m.
4033 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4034 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4035 .   i - row indices
4036 .   j - column indices
4037 -   a - matrix values
4038 
4039    Output Parameter:
4040 .   mat - the matrix
4041 
4042    Level: intermediate
4043 
4044    Notes:
4045        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4046      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4047      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4048 
4049        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4050 
4051 .keywords: matrix, aij, compressed row, sparse, parallel
4052 
4053 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4054           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
4055 @*/
4056 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4057 {
4058   PetscErrorCode ierr;
4059 
4060 
4061  PetscFunctionBegin;
4062   if (i[0]) {
4063     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4064   }
4065   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4066   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4067   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4068   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4069   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4070   PetscFunctionReturn(0);
4071 }
4072