xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision d61e7eb36d836a8e2bf1ad3f41344afcd0d5cf20)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "petscblaslapack.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
20 {
21   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
22   PetscErrorCode ierr;
23   PetscInt       i,*idxb = 0;
24   PetscScalar    *va,*vb;
25   Vec            vtmp;
26 
27   PetscFunctionBegin;
28   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
29   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
30   if (idx) {
31     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++){
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
41   }
42 
43   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
44   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
45   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
46   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 EXTERN_C_BEGIN
51 #undef __FUNCT__
52 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
60   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 /*
81      Local utility routine that creates a mapping from the global column
82    number to the local number in the off-diagonal part of the local
83    storage of the matrix.  This is done in a non scalable way since the
84    length of colmap equals the global matrix length.
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89 {
90   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
91   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
92   PetscErrorCode ierr;
93   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
94 
95   PetscFunctionBegin;
96 #if defined (PETSC_USE_CTABLE)
97   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
98   for (i=0; i<nbs; i++){
99     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
100   }
101 #else
102   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
103   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
105   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106 #endif
107   PetscFunctionReturn(0);
108 }
109 
110 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
111 { \
112  \
113     brow = row/bs;  \
114     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
115     rmax = aimax[brow]; nrow = ailen[brow]; \
116       bcol = col/bs; \
117       ridx = row % bs; cidx = col % bs; \
118       low = 0; high = nrow; \
119       while (high-low > 3) { \
120         t = (low+high)/2; \
121         if (rp[t] > bcol) high = t; \
122         else              low  = t; \
123       } \
124       for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > bcol) break; \
126         if (rp[_i] == bcol) { \
127           bap  = ap +  bs2*_i + bs*cidx + ridx; \
128           if (addv == ADD_VALUES) *bap += value;  \
129           else                    *bap  = value;  \
130           goto a_noinsert; \
131         } \
132       } \
133       if (a->nonew == 1) goto a_noinsert; \
134       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
136       N = nrow++ - 1;  \
137       /* shift up all the later entries in this row */ \
138       for (ii=N; ii>=_i; ii--) { \
139         rp[ii+1] = rp[ii]; \
140         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
141       } \
142       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
143       rp[_i]                      = bcol;  \
144       ap[bs2*_i + bs*cidx + ridx] = value;  \
145       a_noinsert:; \
146     ailen[brow] = nrow; \
147 }
148 
149 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
150 { \
151     brow = row/bs;  \
152     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
153     rmax = bimax[brow]; nrow = bilen[brow]; \
154       bcol = col/bs; \
155       ridx = row % bs; cidx = col % bs; \
156       low = 0; high = nrow; \
157       while (high-low > 3) { \
158         t = (low+high)/2; \
159         if (rp[t] > bcol) high = t; \
160         else              low  = t; \
161       } \
162       for (_i=low; _i<high; _i++) { \
163         if (rp[_i] > bcol) break; \
164         if (rp[_i] == bcol) { \
165           bap  = ap +  bs2*_i + bs*cidx + ridx; \
166           if (addv == ADD_VALUES) *bap += value;  \
167           else                    *bap  = value;  \
168           goto b_noinsert; \
169         } \
170       } \
171       if (b->nonew == 1) goto b_noinsert; \
172       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
173       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
174       CHKMEMQ;\
175       N = nrow++ - 1;  \
176       /* shift up all the later entries in this row */ \
177       for (ii=N; ii>=_i; ii--) { \
178         rp[ii+1] = rp[ii]; \
179         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
180       } \
181       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
182       rp[_i]                      = bcol;  \
183       ap[bs2*_i + bs*cidx + ridx] = value;  \
184       b_noinsert:; \
185     bilen[brow] = nrow; \
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatSetValues_MPIBAIJ"
190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
191 {
192   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
193   MatScalar      value;
194   PetscBool      roworiented = baij->roworiented;
195   PetscErrorCode ierr;
196   PetscInt       i,j,row,col;
197   PetscInt       rstart_orig=mat->rmap->rstart;
198   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
199   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
200 
201   /* Some Variables required in the macro */
202   Mat            A = baij->A;
203   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
204   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
205   MatScalar      *aa=a->a;
206 
207   Mat            B = baij->B;
208   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
209   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
210   MatScalar      *ba=b->a;
211 
212   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
213   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
214   MatScalar      *ap,*bap;
215 
216   PetscFunctionBegin;
217   if (v) PetscValidScalarPointer(v,6);
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_DEBUG)
221     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
222 #endif
223     if (im[i] >= rstart_orig && im[i] < rend_orig) {
224       row = im[i] - rstart_orig;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart_orig && in[j] < cend_orig){
227           col = in[j] - cstart_orig;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         } else if (in[j] < 0) continue;
232 #if defined(PETSC_USE_DEBUG)
233         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240 #if defined (PETSC_USE_CTABLE)
241             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
242             col  = col - 1;
243 #else
244             col = baij->colmap[in[j]/bs] - 1;
245 #endif
246             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
247               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
248               col =  in[j];
249               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
250               B = baij->B;
251               b = (Mat_SeqBAIJ*)(B)->data;
252               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
253               ba=b->a;
254             } else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         if (roworiented) {
265           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
266         } else {
267           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
268         }
269       }
270     }
271   }
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
278 {
279   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
280   const PetscScalar *value;
281   MatScalar         *barray=baij->barray;
282   PetscBool         roworiented = baij->roworiented;
283   PetscErrorCode    ierr;
284   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
285   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
286   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
287 
288   PetscFunctionBegin;
289   if(!barray) {
290     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
291     baij->barray = barray;
292   }
293 
294   if (roworiented) {
295     stepval = (n-1)*bs;
296   } else {
297     stepval = (m-1)*bs;
298   }
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316 	    value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) {
320               barray[jj]  = value[jj];
321             }
322             barray += bs;
323           }
324           barray -= bs2;
325         }
326 
327         if (in[j] >= cstart && in[j] < cend){
328           col  = in[j] - cstart;
329           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
330         }
331         else if (in[j] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
334 #endif
335         else {
336           if (mat->was_assembled) {
337             if (!baij->colmap) {
338               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
339             }
340 
341 #if defined(PETSC_USE_DEBUG)
342 #if defined (PETSC_USE_CTABLE)
343             { PetscInt data;
344               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
345               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346             }
347 #else
348             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
349 #endif
350 #endif
351 #if defined (PETSC_USE_CTABLE)
352 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
353             col  = (col - 1)/bs;
354 #else
355             col = (baij->colmap[in[j]] - 1)/bs;
356 #endif
357             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
358               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
359               col =  in[j];
360             }
361           }
362           else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
389   PetscBool      roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408       row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
413         /* Look up PetscInto the Hash Table */
414         key = (row/bs)*Nbs+(col/bs)+1;
415         h1  = HASH(size,key,tmp);
416 
417 
418         idx = h1;
419 #if defined(PETSC_USE_DEBUG)
420         insert_ct++;
421         total_ct++;
422         if (HT[idx] != key) {
423           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
424           if (idx == size) {
425             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
426             if (idx == h1) {
427               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
428             }
429           }
430         }
431 #else
432         if (HT[idx] != key) {
433           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
434           if (idx == size) {
435             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
436             if (idx == h1) {
437               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
438             }
439           }
440         }
441 #endif
442         /* A HASH table entry is found, so insert the values at the correct address */
443         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
444         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
445       }
446     } else {
447       if (!baij->donotstash) {
448         if (roworiented) {
449           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
450         } else {
451           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
452         }
453       }
454     }
455   }
456 #if defined(PETSC_USE_DEBUG)
457   baij->ht_total_ct = total_ct;
458   baij->ht_insert_ct = insert_ct;
459 #endif
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
466 {
467   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
468   PetscBool         roworiented = baij->roworiented;
469   PetscErrorCode    ierr;
470   PetscInt          i,j,ii,jj,row,col;
471   PetscInt          rstart=baij->rstartbs;
472   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
473   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
474   PetscReal         tmp;
475   MatScalar         **HD = baij->hd,*baij_a;
476   const PetscScalar *v_t,*value;
477 #if defined(PETSC_USE_DEBUG)
478   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
479 #endif
480 
481   PetscFunctionBegin;
482 
483   if (roworiented) {
484     stepval = (n-1)*bs;
485   } else {
486     stepval = (m-1)*bs;
487   }
488   for (i=0; i<m; i++) {
489 #if defined(PETSC_USE_DEBUG)
490     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
491     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
492 #endif
493     row   = im[i];
494     v_t   = v + i*nbs2;
495     if (row >= rstart && row < rend) {
496       for (j=0; j<n; j++) {
497         col = in[j];
498 
499         /* Look up into the Hash Table */
500         key = row*Nbs+col+1;
501         h1  = HASH(size,key,tmp);
502 
503         idx = h1;
504 #if defined(PETSC_USE_DEBUG)
505         total_ct++;
506         insert_ct++;
507        if (HT[idx] != key) {
508           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
509           if (idx == size) {
510             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
511             if (idx == h1) {
512               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
513             }
514           }
515         }
516 #else
517         if (HT[idx] != key) {
518           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
519           if (idx == size) {
520             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
521             if (idx == h1) {
522               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
523             }
524           }
525         }
526 #endif
527         baij_a = HD[idx];
528         if (roworiented) {
529           /*value = v + i*(stepval+bs)*bs + j*bs;*/
530           /* value = v + (i*(stepval+bs)+j)*bs; */
531           value = v_t;
532           v_t  += bs;
533           if (addv == ADD_VALUES) {
534             for (ii=0; ii<bs; ii++,value+=stepval) {
535               for (jj=ii; jj<bs2; jj+=bs) {
536                 baij_a[jj]  += *value++;
537               }
538             }
539           } else {
540             for (ii=0; ii<bs; ii++,value+=stepval) {
541               for (jj=ii; jj<bs2; jj+=bs) {
542                 baij_a[jj]  = *value++;
543               }
544             }
545           }
546         } else {
547           value = v + j*(stepval+bs)*bs + i*bs;
548           if (addv == ADD_VALUES) {
549             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
550               for (jj=0; jj<bs; jj++) {
551                 baij_a[jj]  += *value++;
552               }
553             }
554           } else {
555             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
556               for (jj=0; jj<bs; jj++) {
557                 baij_a[jj]  = *value++;
558               }
559             }
560           }
561         }
562       }
563     } else {
564       if (!baij->donotstash) {
565         if (roworiented) {
566           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         } else {
568           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
569         }
570       }
571     }
572   }
573 #if defined(PETSC_USE_DEBUG)
574   baij->ht_total_ct = total_ct;
575   baij->ht_insert_ct = insert_ct;
576 #endif
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatGetValues_MPIBAIJ"
582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
583 {
584   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
585   PetscErrorCode ierr;
586   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
587   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
588 
589   PetscFunctionBegin;
590   for (i=0; i<m; i++) {
591     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
592     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
593     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
594       row = idxm[i] - bsrstart;
595       for (j=0; j<n; j++) {
596         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
597         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
598         if (idxn[j] >= bscstart && idxn[j] < bscend){
599           col = idxn[j] - bscstart;
600           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
601         } else {
602           if (!baij->colmap) {
603             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
604           }
605 #if defined (PETSC_USE_CTABLE)
606           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
607           data --;
608 #else
609           data = baij->colmap[idxn[j]/bs]-1;
610 #endif
611           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
612           else {
613             col  = data + idxn[j]%bs;
614             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
615           }
616         }
617       }
618     } else {
619       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620     }
621   }
622  PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatNorm_MPIBAIJ"
627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
628 {
629   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
630   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
633   PetscReal      sum = 0.0;
634   MatScalar      *v;
635 
636   PetscFunctionBegin;
637   if (baij->size == 1) {
638     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
639   } else {
640     if (type == NORM_FROBENIUS) {
641       v = amat->a;
642       nz = amat->nz*bs2;
643       for (i=0; i<nz; i++) {
644 #if defined(PETSC_USE_COMPLEX)
645         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
646 #else
647         sum += (*v)*(*v); v++;
648 #endif
649       }
650       v = bmat->a;
651       nz = bmat->nz*bs2;
652       for (i=0; i<nz; i++) {
653 #if defined(PETSC_USE_COMPLEX)
654         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
655 #else
656         sum += (*v)*(*v); v++;
657 #endif
658       }
659       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
660       *nrm = sqrt(*nrm);
661     } else if (type == NORM_1) { /* max column sum */
662       PetscReal *tmp,*tmp2;
663       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
664       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
665       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
666       v = amat->a; jj = amat->j;
667       for (i=0; i<amat->nz; i++) {
668         for (j=0; j<bs; j++){
669           col = bs*(cstart + *jj) + j; /* column index */
670           for (row=0; row<bs; row++){
671             tmp[col] += PetscAbsScalar(*v);  v++;
672           }
673         }
674         jj++;
675       }
676       v = bmat->a; jj = bmat->j;
677       for (i=0; i<bmat->nz; i++) {
678         for (j=0; j<bs; j++){
679           col = bs*garray[*jj] + j;
680           for (row=0; row<bs; row++){
681             tmp[col] += PetscAbsScalar(*v); v++;
682           }
683         }
684         jj++;
685       }
686       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
687       *nrm = 0.0;
688       for (j=0; j<mat->cmap->N; j++) {
689         if (tmp2[j] > *nrm) *nrm = tmp2[j];
690       }
691       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
692     } else if (type == NORM_INFINITY) { /* max row sum */
693       PetscReal *sums;
694       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
695       sum = 0.0;
696       for (j=0; j<amat->mbs; j++) {
697         for (row=0; row<bs; row++) sums[row] = 0.0;
698         v = amat->a + bs2*amat->i[j];
699         nz = amat->i[j+1]-amat->i[j];
700         for (i=0; i<nz; i++) {
701           for (col=0; col<bs; col++){
702             for (row=0; row<bs; row++){
703               sums[row] += PetscAbsScalar(*v); v++;
704             }
705           }
706         }
707         v = bmat->a + bs2*bmat->i[j];
708         nz = bmat->i[j+1]-bmat->i[j];
709         for (i=0; i<nz; i++) {
710           for (col=0; col<bs; col++){
711             for (row=0; row<bs; row++){
712               sums[row] += PetscAbsScalar(*v); v++;
713             }
714           }
715         }
716         for (row=0; row<bs; row++){
717           if (sums[row] > sum) sum = sums[row];
718         }
719       }
720       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
721       ierr = PetscFree(sums);CHKERRQ(ierr);
722     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 /*
728   Creates the hash table, and sets the table
729   This table is created only once.
730   If new entried need to be added to the matrix
731   then the hash table has to be destroyed and
732   recreated.
733 */
734 #undef __FUNCT__
735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
737 {
738   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
739   Mat            A = baij->A,B=baij->B;
740   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
741   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
742   PetscErrorCode ierr;
743   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
744   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
745   PetscInt       *HT,key;
746   MatScalar      **HD;
747   PetscReal      tmp;
748 #if defined(PETSC_USE_INFO)
749   PetscInt       ct=0,max=0;
750 #endif
751 
752   PetscFunctionBegin;
753   if (baij->ht) PetscFunctionReturn(0);
754 
755   baij->ht_size = (PetscInt)(factor*nz);
756   ht_size       = baij->ht_size;
757 
758   /* Allocate Memory for Hash Table */
759   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
760   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
761   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
762   HD   = baij->hd;
763   HT   = baij->ht;
764 
765   /* Loop Over A */
766   for (i=0; i<a->mbs; i++) {
767     for (j=ai[i]; j<ai[i+1]; j++) {
768       row = i+rstart;
769       col = aj[j]+cstart;
770 
771       key = row*Nbs + col + 1;
772       h1  = HASH(ht_size,key,tmp);
773       for (k=0; k<ht_size; k++){
774         if (!HT[(h1+k)%ht_size]) {
775           HT[(h1+k)%ht_size] = key;
776           HD[(h1+k)%ht_size] = a->a + j*bs2;
777           break;
778 #if defined(PETSC_USE_INFO)
779         } else {
780           ct++;
781 #endif
782         }
783       }
784 #if defined(PETSC_USE_INFO)
785       if (k> max) max = k;
786 #endif
787     }
788   }
789   /* Loop Over B */
790   for (i=0; i<b->mbs; i++) {
791     for (j=bi[i]; j<bi[i+1]; j++) {
792       row = i+rstart;
793       col = garray[bj[j]];
794       key = row*Nbs + col + 1;
795       h1  = HASH(ht_size,key,tmp);
796       for (k=0; k<ht_size; k++){
797         if (!HT[(h1+k)%ht_size]) {
798           HT[(h1+k)%ht_size] = key;
799           HD[(h1+k)%ht_size] = b->a + j*bs2;
800           break;
801 #if defined(PETSC_USE_INFO)
802         } else {
803           ct++;
804 #endif
805         }
806       }
807 #if defined(PETSC_USE_INFO)
808       if (k> max) max = k;
809 #endif
810     }
811   }
812 
813   /* Print Summary */
814 #if defined(PETSC_USE_INFO)
815   for (i=0,j=0; i<ht_size; i++) {
816     if (HT[i]) {j++;}
817   }
818   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
819 #endif
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
826 {
827   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
828   PetscErrorCode ierr;
829   PetscInt       nstash,reallocs;
830   InsertMode     addv;
831 
832   PetscFunctionBegin;
833   if (baij->donotstash || mat->nooffprocentries) {
834     PetscFunctionReturn(0);
835   }
836 
837   /* make sure all processors are either in INSERTMODE or ADDMODE */
838   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
839   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
840   mat->insertmode = addv; /* in case this processor had no cache */
841 
842   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
843   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
844   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
845   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
846   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
847   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
854 {
855   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
856   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
857   PetscErrorCode ierr;
858   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
859   PetscInt       *row,*col;
860   PetscBool      r1,r2,r3,other_disassembled;
861   MatScalar      *val;
862   InsertMode     addv = mat->insertmode;
863   PetscMPIInt    n;
864 
865   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
866   PetscFunctionBegin;
867   if (!baij->donotstash && !mat->nooffprocentries) {
868     while (1) {
869       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
870       if (!flg) break;
871 
872       for (i=0; i<n;) {
873         /* Now identify the consecutive vals belonging to the same row */
874         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
875         if (j < n) ncols = j-i;
876         else       ncols = n-i;
877         /* Now assemble all these values with a single function call */
878         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
879         i = j;
880       }
881     }
882     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
883     /* Now process the block-stash. Since the values are stashed column-oriented,
884        set the roworiented flag to column oriented, and after MatSetValues()
885        restore the original flags */
886     r1 = baij->roworiented;
887     r2 = a->roworiented;
888     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
889     baij->roworiented = PETSC_FALSE;
890     a->roworiented    = PETSC_FALSE;
891     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
892     while (1) {
893       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
894       if (!flg) break;
895 
896       for (i=0; i<n;) {
897         /* Now identify the consecutive vals belonging to the same row */
898         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
899         if (j < n) ncols = j-i;
900         else       ncols = n-i;
901         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
902         i = j;
903       }
904     }
905     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
906     baij->roworiented = r1;
907     a->roworiented    = r2;
908     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
909   }
910 
911   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
912   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
913 
914   /* determine if any processor has disassembled, if so we must
915      also disassemble ourselfs, in order that we may reassemble. */
916   /*
917      if nonzero structure of submatrix B cannot change then we know that
918      no processor disassembled thus we can skip this stuff
919   */
920   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
921     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
922     if (mat->was_assembled && !other_disassembled) {
923       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
924     }
925   }
926 
927   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
928     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
929   }
930   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
931   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
932   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
933 
934 #if defined(PETSC_USE_INFO)
935   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
936     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
937     baij->ht_total_ct  = 0;
938     baij->ht_insert_ct = 0;
939   }
940 #endif
941   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
942     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
943     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
944     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
945   }
946 
947   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
948   baij->rowvalues = 0;
949   PetscFunctionReturn(0);
950 }
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
955 {
956   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
957   PetscErrorCode    ierr;
958   PetscMPIInt       size = baij->size,rank = baij->rank;
959   PetscInt          bs = mat->rmap->bs;
960   PetscBool         iascii,isdraw;
961   PetscViewer       sviewer;
962   PetscViewerFormat format;
963 
964   PetscFunctionBegin;
965   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
966   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
967   if (iascii) {
968     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
969     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
970       MatInfo info;
971       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
972       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
973       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
974              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
977       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
978       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
979       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
980       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
981       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     } else if (format == PETSC_VIEWER_ASCII_INFO) {
984       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
987       PetscFunctionReturn(0);
988     }
989   }
990 
991   if (isdraw) {
992     PetscDraw       draw;
993     PetscBool  isnull;
994     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
995     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
996   }
997 
998   if (size == 1) {
999     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1000     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1001   } else {
1002     /* assemble the entire matrix onto first processor. */
1003     Mat         A;
1004     Mat_SeqBAIJ *Aloc;
1005     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1006     MatScalar   *a;
1007 
1008     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1009     /* Perhaps this should be the type of mat? */
1010     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1011     if (!rank) {
1012       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1013     } else {
1014       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1015     }
1016     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1017     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1018     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1019 
1020     /* copy over the A part */
1021     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1022     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1023     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1024 
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = (baij->cstartbs+aj[j])*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     /* copy over the B part */
1037     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1038     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1039     for (i=0; i<mbs; i++) {
1040       rvals[0] = bs*(baij->rstartbs + i);
1041       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1042       for (j=ai[i]; j<ai[i+1]; j++) {
1043         col = baij->garray[aj[j]]*bs;
1044         for (k=0; k<bs; k++) {
1045           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1046           col++; a += bs;
1047         }
1048       }
1049     }
1050     ierr = PetscFree(rvals);CHKERRQ(ierr);
1051     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     /*
1054        Everyone has to call to draw the matrix since the graphics waits are
1055        synchronized across all processors that share the PetscDraw object
1056     */
1057     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1058     if (!rank) {
1059       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1060     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1061       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1062       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1063     }
1064     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1065     ierr = MatDestroy(A);CHKERRQ(ierr);
1066   }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1072 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1073 {
1074   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1075   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1076   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1077   PetscErrorCode ierr;
1078   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,count,j,k,bs2=a->bs2,header[4],nz,rlen;
1079   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1080   int            fd;
1081   PetscScalar    *column_values;
1082   FILE           *file;
1083   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1084 
1085   PetscFunctionBegin;
1086   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1087   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1088   nz   = bs2*(A->nz + B->nz);
1089   rlen = mat->rmap->n;
1090   if (!rank) {
1091     header[0] = MAT_FILE_CLASSID;
1092     header[1] = mat->rmap->N;
1093     header[2] = mat->cmap->N;
1094     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1095     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1096     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1097     /* get largest number of rows any processor has */
1098     range = mat->rmap->range;
1099     for (i=1; i<size; i++) {
1100       rlen = PetscMax(rlen,range[i+1] - range[i]);
1101     }
1102   } else {
1103     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1104   }
1105 
1106   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1107   /* compute lengths of each row  */
1108   count = 0;
1109   for (i=0; i<a->mbs; i++) {
1110     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1111   }
1112   /* store the row lengths to the file */
1113   if (!rank) {
1114     MPI_Status status;
1115     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1116     rlen  = (range[1] - range[0])/bs;
1117     for (i=0; i<rlen; i++) {
1118       for (j=0; j<bs; j++) {
1119         row_lens[i*bs+j] = bs*crow_lens[i];
1120       }
1121     }
1122     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1123     for (i=1; i<size; i++) {
1124       rlen = (range[i+1] - range[i])/bs;
1125       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1126       for (k=0; k<rlen; k++) {
1127 	for (j=0; j<bs; j++) {
1128 	  row_lens[k*bs+j] = bs*crow_lens[k];
1129 	}
1130       }
1131       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1132     }
1133     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1134   } else {
1135     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1136   }
1137   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1138 
1139   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1140      information needed to make it for each row from a block row. This does require more communication but still not more than
1141      the communication needed for the nonzero values  */
1142   nzmax = nz; /*  space a largest processor needs */
1143   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1144   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1145   cnt  = 0;
1146   for (i=0; i<a->mbs; i++) {
1147     pcnt = cnt;
1148     for (j=B->i[i]; j<B->i[i+1]; j++) {
1149       if ( (col = garray[B->j[j]]) > cstart) break;
1150       for (l=0; l<bs; l++) {
1151 	column_indices[cnt++] = bs*col+l;
1152       }
1153     }
1154     for (k=A->i[i]; k<A->i[i+1]; k++) {
1155       for (l=0; l<bs; l++) {
1156         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1157       }
1158     }
1159     for (; j<B->i[i+1]; j++) {
1160       for (l=0; l<bs; l++) {
1161         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1162       }
1163     }
1164     len = cnt - pcnt;
1165     for (k=1; k<bs; k++) {
1166       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1167       cnt += len;
1168     }
1169   }
1170   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1171 
1172   /* store the columns to the file */
1173   if (!rank) {
1174     MPI_Status status;
1175     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1176     for (i=1; i<size; i++) {
1177       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1178       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1179       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1180     }
1181   } else {
1182     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1183     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1184   }
1185   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1186 
1187   /* load up the numerical values */
1188   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1189   cnt = 0;
1190   for (i=0; i<a->mbs; i++) {
1191     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1192     for (j=B->i[i]; j<B->i[i+1]; j++) {
1193       if ( garray[B->j[j]] > cstart) break;
1194       for (l=0; l<bs; l++) {
1195         for (ll=0; ll<bs; ll++) {
1196 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1197         }
1198       }
1199       cnt += bs;
1200     }
1201     for (k=A->i[i]; k<A->i[i+1]; k++) {
1202       for (l=0; l<bs; l++) {
1203         for (ll=0; ll<bs; ll++) {
1204           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1205         }
1206       }
1207       cnt += bs;
1208     }
1209     for (; j<B->i[i+1]; j++) {
1210       for (l=0; l<bs; l++) {
1211         for (ll=0; ll<bs; ll++) {
1212 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1213         }
1214       }
1215       cnt += bs;
1216     }
1217     cnt += (bs-1)*rlen;
1218   }
1219   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1220 
1221   /* store the column values to the file */
1222   if (!rank) {
1223     MPI_Status status;
1224     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1225     for (i=1; i<size; i++) {
1226       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1227       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1228       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1229     }
1230   } else {
1231     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1232     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1233   }
1234   ierr = PetscFree(column_values);CHKERRQ(ierr);
1235 
1236   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1237   if (file) {
1238     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1239   }
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNCT__
1244 #define __FUNCT__ "MatView_MPIBAIJ"
1245 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1246 {
1247   PetscErrorCode ierr;
1248   PetscBool      iascii,isdraw,issocket,isbinary;
1249 
1250   PetscFunctionBegin;
1251   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1252   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1253   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1254   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1255   if (iascii || isdraw || issocket) {
1256     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1257   } else if (isbinary) {
1258     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1259   } else {
1260     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1261   }
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1267 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1268 {
1269   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1270   PetscErrorCode ierr;
1271 
1272   PetscFunctionBegin;
1273 #if defined(PETSC_USE_LOG)
1274   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1275 #endif
1276   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1277   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1278   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
1279   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
1280 #if defined (PETSC_USE_CTABLE)
1281   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1282 #else
1283   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1284 #endif
1285   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1286   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1287   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1288   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1289   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1290   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1291   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1292   ierr = PetscFree(baij);CHKERRQ(ierr);
1293 
1294   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatMult_MPIBAIJ"
1307 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1308 {
1309   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311   PetscInt       nt;
1312 
1313   PetscFunctionBegin;
1314   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1315   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1316   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1317   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1318   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1319   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1320   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1321   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1327 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1328 {
1329   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1334   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1335   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1336   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1342 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1343 {
1344   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1345   PetscErrorCode ierr;
1346   PetscBool      merged;
1347 
1348   PetscFunctionBegin;
1349   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1350   /* do nondiagonal part */
1351   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1352   if (!merged) {
1353     /* send it on its way */
1354     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1355     /* do local part */
1356     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1357     /* receive remote parts: note this assumes the values are not actually */
1358     /* inserted in yy until the next line */
1359     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1360   } else {
1361     /* do local part */
1362     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1363     /* send it on its way */
1364     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1365     /* values actually were received in the Begin() but we need to call this nop */
1366     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1367   }
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1373 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1374 {
1375   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1376   PetscErrorCode ierr;
1377 
1378   PetscFunctionBegin;
1379   /* do nondiagonal part */
1380   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1381   /* send it on its way */
1382   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383   /* do local part */
1384   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1385   /* receive remote parts: note this assumes the values are not actually */
1386   /* inserted in yy until the next line, which is true for my implementation*/
1387   /* but is not perhaps always true. */
1388   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 /*
1393   This only works correctly for square matrices where the subblock A->A is the
1394    diagonal block
1395 */
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1398 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1399 {
1400   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1401   PetscErrorCode ierr;
1402 
1403   PetscFunctionBegin;
1404   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1405   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatScale_MPIBAIJ"
1411 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1412 {
1413   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1414   PetscErrorCode ierr;
1415 
1416   PetscFunctionBegin;
1417   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1418   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1424 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1425 {
1426   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1427   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1428   PetscErrorCode ierr;
1429   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1430   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1431   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1432 
1433   PetscFunctionBegin;
1434   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1435   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1436   mat->getrowactive = PETSC_TRUE;
1437 
1438   if (!mat->rowvalues && (idx || v)) {
1439     /*
1440         allocate enough space to hold information from the longest row.
1441     */
1442     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1443     PetscInt     max = 1,mbs = mat->mbs,tmp;
1444     for (i=0; i<mbs; i++) {
1445       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1446       if (max < tmp) { max = tmp; }
1447     }
1448     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1449   }
1450   lrow = row - brstart;
1451 
1452   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1453   if (!v)   {pvA = 0; pvB = 0;}
1454   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1455   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1456   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1457   nztot = nzA + nzB;
1458 
1459   cmap  = mat->garray;
1460   if (v  || idx) {
1461     if (nztot) {
1462       /* Sort by increasing column numbers, assuming A and B already sorted */
1463       PetscInt imark = -1;
1464       if (v) {
1465         *v = v_p = mat->rowvalues;
1466         for (i=0; i<nzB; i++) {
1467           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1468           else break;
1469         }
1470         imark = i;
1471         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1472         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1473       }
1474       if (idx) {
1475         *idx = idx_p = mat->rowindices;
1476         if (imark > -1) {
1477           for (i=0; i<imark; i++) {
1478             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1479           }
1480         } else {
1481           for (i=0; i<nzB; i++) {
1482             if (cmap[cworkB[i]/bs] < cstart)
1483               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1484             else break;
1485           }
1486           imark = i;
1487         }
1488         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1489         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1490       }
1491     } else {
1492       if (idx) *idx = 0;
1493       if (v)   *v   = 0;
1494     }
1495   }
1496   *nz = nztot;
1497   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1498   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1504 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1505 {
1506   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1507 
1508   PetscFunctionBegin;
1509   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1510   baij->getrowactive = PETSC_FALSE;
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1516 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1517 {
1518   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1519   PetscErrorCode ierr;
1520 
1521   PetscFunctionBegin;
1522   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1523   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 #undef __FUNCT__
1528 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1529 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1530 {
1531   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1532   Mat            A = a->A,B = a->B;
1533   PetscErrorCode ierr;
1534   PetscReal      isend[5],irecv[5];
1535 
1536   PetscFunctionBegin;
1537   info->block_size     = (PetscReal)matin->rmap->bs;
1538   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1539   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1540   isend[3] = info->memory;  isend[4] = info->mallocs;
1541   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1542   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1543   isend[3] += info->memory;  isend[4] += info->mallocs;
1544   if (flag == MAT_LOCAL) {
1545     info->nz_used      = isend[0];
1546     info->nz_allocated = isend[1];
1547     info->nz_unneeded  = isend[2];
1548     info->memory       = isend[3];
1549     info->mallocs      = isend[4];
1550   } else if (flag == MAT_GLOBAL_MAX) {
1551     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1552     info->nz_used      = irecv[0];
1553     info->nz_allocated = irecv[1];
1554     info->nz_unneeded  = irecv[2];
1555     info->memory       = irecv[3];
1556     info->mallocs      = irecv[4];
1557   } else if (flag == MAT_GLOBAL_SUM) {
1558     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1559     info->nz_used      = irecv[0];
1560     info->nz_allocated = irecv[1];
1561     info->nz_unneeded  = irecv[2];
1562     info->memory       = irecv[3];
1563     info->mallocs      = irecv[4];
1564   } else {
1565     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1566   }
1567   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1568   info->fill_ratio_needed = 0;
1569   info->factor_mallocs    = 0;
1570   PetscFunctionReturn(0);
1571 }
1572 
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1575 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1576 {
1577   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1578   PetscErrorCode ierr;
1579 
1580   PetscFunctionBegin;
1581   switch (op) {
1582   case MAT_NEW_NONZERO_LOCATIONS:
1583   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1584   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1585   case MAT_KEEP_NONZERO_PATTERN:
1586   case MAT_NEW_NONZERO_LOCATION_ERR:
1587     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1588     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1589     break;
1590   case MAT_ROW_ORIENTED:
1591     a->roworiented = flg;
1592     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1593     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1594     break;
1595   case MAT_NEW_DIAGONALS:
1596     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1597     break;
1598   case MAT_IGNORE_OFF_PROC_ENTRIES:
1599     a->donotstash = flg;
1600     break;
1601   case MAT_USE_HASH_TABLE:
1602     a->ht_flag = flg;
1603     break;
1604   case MAT_SYMMETRIC:
1605   case MAT_STRUCTURALLY_SYMMETRIC:
1606   case MAT_HERMITIAN:
1607   case MAT_SYMMETRY_ETERNAL:
1608     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1609     break;
1610   default:
1611     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1612   }
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1618 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1619 {
1620   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1621   Mat_SeqBAIJ    *Aloc;
1622   Mat            B;
1623   PetscErrorCode ierr;
1624   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1625   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1626   MatScalar      *a;
1627 
1628   PetscFunctionBegin;
1629   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1630   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1631     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1632     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1633     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1634     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1635   } else {
1636     B = *matout;
1637   }
1638 
1639   /* copy over the A part */
1640   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1641   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1642   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1643 
1644   for (i=0; i<mbs; i++) {
1645     rvals[0] = bs*(baij->rstartbs + i);
1646     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1647     for (j=ai[i]; j<ai[i+1]; j++) {
1648       col = (baij->cstartbs+aj[j])*bs;
1649       for (k=0; k<bs; k++) {
1650         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1651         col++; a += bs;
1652       }
1653     }
1654   }
1655   /* copy over the B part */
1656   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1657   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1658   for (i=0; i<mbs; i++) {
1659     rvals[0] = bs*(baij->rstartbs + i);
1660     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1661     for (j=ai[i]; j<ai[i+1]; j++) {
1662       col = baij->garray[aj[j]]*bs;
1663       for (k=0; k<bs; k++) {
1664         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1665         col++; a += bs;
1666       }
1667     }
1668   }
1669   ierr = PetscFree(rvals);CHKERRQ(ierr);
1670   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1671   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1672 
1673   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1674     *matout = B;
1675   } else {
1676     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1677   }
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1683 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1684 {
1685   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1686   Mat            a = baij->A,b = baij->B;
1687   PetscErrorCode ierr;
1688   PetscInt       s1,s2,s3;
1689 
1690   PetscFunctionBegin;
1691   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1692   if (rr) {
1693     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1694     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1695     /* Overlap communication with computation. */
1696     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1697   }
1698   if (ll) {
1699     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1700     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1701     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1702   }
1703   /* scale  the diagonal block */
1704   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1705 
1706   if (rr) {
1707     /* Do a scatter end and then right scale the off-diagonal block */
1708     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1709     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1710   }
1711 
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 #undef __FUNCT__
1716 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1717 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1718 {
1719   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1720   PetscErrorCode ierr;
1721   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1722   PetscInt       i,*owners = A->rmap->range;
1723   PetscInt       *nprocs,j,idx,nsends,row;
1724   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1725   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1726   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1727   MPI_Comm       comm = ((PetscObject)A)->comm;
1728   MPI_Request    *send_waits,*recv_waits;
1729   MPI_Status     recv_status,*send_status;
1730 #if defined(PETSC_DEBUG)
1731   PetscBool      found = PETSC_FALSE;
1732 #endif
1733 
1734   PetscFunctionBegin;
1735   /*  first count number of contributors to each processor */
1736   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1737   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1738   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1739   j = 0;
1740   for (i=0; i<N; i++) {
1741     if (lastidx > (idx = rows[i])) j = 0;
1742     lastidx = idx;
1743     for (; j<size; j++) {
1744       if (idx >= owners[j] && idx < owners[j+1]) {
1745         nprocs[2*j]++;
1746         nprocs[2*j+1] = 1;
1747         owner[i] = j;
1748 #if defined(PETSC_DEBUG)
1749         found = PETSC_TRUE;
1750 #endif
1751         break;
1752       }
1753     }
1754 #if defined(PETSC_DEBUG)
1755     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1756     found = PETSC_FALSE;
1757 #endif
1758   }
1759   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1760 
1761   if (A->nooffproczerorows) {
1762     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1763     nrecvs = nsends;
1764     nmax   = N;
1765   } else {
1766     /* inform other processors of number of messages and max length*/
1767     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1768   }
1769 
1770   /* post receives:   */
1771   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1772   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1773   for (i=0; i<nrecvs; i++) {
1774     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1775   }
1776 
1777   /* do sends:
1778      1) starts[i] gives the starting index in svalues for stuff going to
1779      the ith processor
1780   */
1781   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1782   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1783   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1784   starts[0]  = 0;
1785   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1786   for (i=0; i<N; i++) {
1787     svalues[starts[owner[i]]++] = rows[i];
1788   }
1789 
1790   starts[0] = 0;
1791   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1792   count = 0;
1793   for (i=0; i<size; i++) {
1794     if (nprocs[2*i+1]) {
1795       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1796     }
1797   }
1798   ierr = PetscFree(starts);CHKERRQ(ierr);
1799 
1800   base = owners[rank];
1801 
1802   /*  wait on receives */
1803   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1804   count  = nrecvs;
1805   slen = 0;
1806   while (count) {
1807     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1808     /* unpack receives into our local space */
1809     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1810     source[imdex]  = recv_status.MPI_SOURCE;
1811     lens[imdex]    = n;
1812     slen          += n;
1813     count--;
1814   }
1815   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1816 
1817   /* move the data into the send scatter */
1818   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1819   count = 0;
1820   for (i=0; i<nrecvs; i++) {
1821     values = rvalues + i*nmax;
1822     for (j=0; j<lens[i]; j++) {
1823       lrows[count++] = values[j] - base;
1824     }
1825   }
1826   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1827   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1828   ierr = PetscFree(owner);CHKERRQ(ierr);
1829   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1830 
1831   /* actually zap the local rows */
1832   /*
1833         Zero the required rows. If the "diagonal block" of the matrix
1834      is square and the user wishes to set the diagonal we use separate
1835      code so that MatSetValues() is not called for each diagonal allocating
1836      new memory, thus calling lots of mallocs and slowing things down.
1837 
1838   */
1839   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1840   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1841   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1842     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1843   } else if (diag != 0.0) {
1844     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1845     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1846        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1847     for (i=0; i<slen; i++) {
1848       row  = lrows[i] + rstart_bs;
1849       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1850     }
1851     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1852     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1853   } else {
1854     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1855   }
1856 
1857   ierr = PetscFree(lrows);CHKERRQ(ierr);
1858 
1859   /* wait on sends */
1860   if (nsends) {
1861     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1862     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1863     ierr = PetscFree(send_status);CHKERRQ(ierr);
1864   }
1865   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1866   ierr = PetscFree(svalues);CHKERRQ(ierr);
1867 
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 #undef __FUNCT__
1872 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1873 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1874 {
1875   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1876   PetscErrorCode ierr;
1877 
1878   PetscFunctionBegin;
1879   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatEqual_MPIBAIJ"
1887 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1888 {
1889   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1890   Mat            a,b,c,d;
1891   PetscBool      flg;
1892   PetscErrorCode ierr;
1893 
1894   PetscFunctionBegin;
1895   a = matA->A; b = matA->B;
1896   c = matB->A; d = matB->B;
1897 
1898   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1899   if (flg) {
1900     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1901   }
1902   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 #undef __FUNCT__
1907 #define __FUNCT__ "MatCopy_MPIBAIJ"
1908 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1909 {
1910   PetscErrorCode ierr;
1911   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1912   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1913 
1914   PetscFunctionBegin;
1915   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1916   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1917     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1918   } else {
1919     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1920     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1921   }
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 #undef __FUNCT__
1926 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1927 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1928 {
1929   PetscErrorCode ierr;
1930 
1931   PetscFunctionBegin;
1932   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 #undef __FUNCT__
1937 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1938 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1939 {
1940   PetscErrorCode ierr;
1941   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1942   PetscBLASInt   bnz,one=1;
1943   Mat_SeqBAIJ    *x,*y;
1944 
1945   PetscFunctionBegin;
1946   if (str == SAME_NONZERO_PATTERN) {
1947     PetscScalar alpha = a;
1948     x = (Mat_SeqBAIJ *)xx->A->data;
1949     y = (Mat_SeqBAIJ *)yy->A->data;
1950     bnz = PetscBLASIntCast(x->nz);
1951     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1952     x = (Mat_SeqBAIJ *)xx->B->data;
1953     y = (Mat_SeqBAIJ *)yy->B->data;
1954     bnz = PetscBLASIntCast(x->nz);
1955     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1956   } else {
1957     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1958   }
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1964 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1965 {
1966   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1967   PetscInt rbs,cbs;
1968   PetscErrorCode ierr;
1969 
1970   PetscFunctionBegin;
1971   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1972   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1973   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1974   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1975   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1976   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1982 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1983 {
1984   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1985   PetscErrorCode ierr;
1986 
1987   PetscFunctionBegin;
1988   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1989   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 #undef __FUNCT__
1994 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1995 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1996 {
1997   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1998   PetscErrorCode ierr;
1999 
2000   PetscFunctionBegin;
2001   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2002   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2008 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2009 {
2010   PetscErrorCode ierr;
2011   IS             iscol_local;
2012   PetscInt       csize;
2013 
2014   PetscFunctionBegin;
2015   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2016   if (call == MAT_REUSE_MATRIX) {
2017     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2018     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2019   } else {
2020     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2021   }
2022   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2023   if (call == MAT_INITIAL_MATRIX) {
2024     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2025     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
2026   }
2027   PetscFunctionReturn(0);
2028 }
2029 
2030 #undef __FUNCT__
2031 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2032 /*
2033     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2034   in local and then by concatenating the local matrices the end result.
2035   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2036 */
2037 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2038 {
2039   PetscErrorCode ierr;
2040   PetscMPIInt    rank,size;
2041   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2042   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2043   Mat            *local,M,Mreuse;
2044   MatScalar      *vwork,*aa;
2045   MPI_Comm       comm = ((PetscObject)mat)->comm;
2046   Mat_SeqBAIJ    *aij;
2047 
2048 
2049   PetscFunctionBegin;
2050   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2051   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2052 
2053   if (call ==  MAT_REUSE_MATRIX) {
2054     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2055     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2056     local = &Mreuse;
2057     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2058   } else {
2059     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2060     Mreuse = *local;
2061     ierr   = PetscFree(local);CHKERRQ(ierr);
2062   }
2063 
2064   /*
2065       m - number of local rows
2066       n - number of columns (same on all processors)
2067       rstart - first row in new global matrix generated
2068   */
2069   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2070   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2071   m    = m/bs;
2072   n    = n/bs;
2073 
2074   if (call == MAT_INITIAL_MATRIX) {
2075     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2076     ii  = aij->i;
2077     jj  = aij->j;
2078 
2079     /*
2080         Determine the number of non-zeros in the diagonal and off-diagonal
2081         portions of the matrix in order to do correct preallocation
2082     */
2083 
2084     /* first get start and end of "diagonal" columns */
2085     if (csize == PETSC_DECIDE) {
2086       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2087       if (mglobal == n*bs) { /* square matrix */
2088 	nlocal = m;
2089       } else {
2090         nlocal = n/size + ((n % size) > rank);
2091       }
2092     } else {
2093       nlocal = csize/bs;
2094     }
2095     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2096     rstart = rend - nlocal;
2097     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2098 
2099     /* next, compute all the lengths */
2100     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2101     olens = dlens + m;
2102     for (i=0; i<m; i++) {
2103       jend = ii[i+1] - ii[i];
2104       olen = 0;
2105       dlen = 0;
2106       for (j=0; j<jend; j++) {
2107         if (*jj < rstart || *jj >= rend) olen++;
2108         else dlen++;
2109         jj++;
2110       }
2111       olens[i] = olen;
2112       dlens[i] = dlen;
2113     }
2114     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2115     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2116     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2117     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2118     ierr = PetscFree(dlens);CHKERRQ(ierr);
2119   } else {
2120     PetscInt ml,nl;
2121 
2122     M = *newmat;
2123     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2124     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2125     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2126     /*
2127          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2128        rather than the slower MatSetValues().
2129     */
2130     M->was_assembled = PETSC_TRUE;
2131     M->assembled     = PETSC_FALSE;
2132   }
2133   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2134   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2135   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2136   ii  = aij->i;
2137   jj  = aij->j;
2138   aa  = aij->a;
2139   for (i=0; i<m; i++) {
2140     row   = rstart/bs + i;
2141     nz    = ii[i+1] - ii[i];
2142     cwork = jj;     jj += nz;
2143     vwork = aa;     aa += nz;
2144     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2145   }
2146 
2147   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2148   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2149   *newmat = M;
2150 
2151   /* save submatrix used in processor for next request */
2152   if (call ==  MAT_INITIAL_MATRIX) {
2153     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2154     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2155   }
2156 
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 #undef __FUNCT__
2161 #define __FUNCT__ "MatPermute_MPIBAIJ"
2162 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2163 {
2164   MPI_Comm       comm,pcomm;
2165   PetscInt       first,local_size,nrows;
2166   const PetscInt *rows;
2167   PetscMPIInt    size;
2168   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2169   PetscErrorCode ierr;
2170 
2171   PetscFunctionBegin;
2172   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2173   /* make a collective version of 'rowp' */
2174   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2175   if (pcomm==comm) {
2176     crowp = rowp;
2177   } else {
2178     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2179     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2180     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2181     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2182   }
2183   /* collect the global row permutation and invert it */
2184   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2185   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2186   if (pcomm!=comm) {
2187     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2188   }
2189   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2190   /* get the local target indices */
2191   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2192   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2193   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2194   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2195   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2196   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2197   /* the column permutation is so much easier;
2198      make a local version of 'colp' and invert it */
2199   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2200   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2201   if (size==1) {
2202     lcolp = colp;
2203   } else {
2204     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2205     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2206     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2207   }
2208   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2209   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2210   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2211   if (size>1) {
2212     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2213     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2214   }
2215   /* now we just get the submatrix */
2216   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2217   /* clean up */
2218   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2219   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 #undef __FUNCT__
2224 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2225 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2226 {
2227   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2228   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2229 
2230   PetscFunctionBegin;
2231   if (nghosts) { *nghosts = B->nbs;}
2232   if (ghosts) {*ghosts = baij->garray;}
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2240 /*
2241     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2242 */
2243 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2244 {
2245   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2246   PetscErrorCode        ierr;
2247   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2248   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2249   const PetscInt        *is;
2250   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2251   PetscInt              *rowhit,M,cstart,cend,colb;
2252   PetscInt              *columnsforrow,l;
2253   IS                    *isa;
2254   PetscBool              done,flg;
2255   ISLocalToGlobalMapping map = mat->bmapping;
2256   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2257 
2258   PetscFunctionBegin;
2259   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2260   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2261 
2262   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2263   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2264   M                = mat->rmap->n/bs;
2265   cstart           = mat->cmap->rstart/bs;
2266   cend             = mat->cmap->rend/bs;
2267   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2268   c->N             = mat->cmap->N/bs;
2269   c->m             = mat->rmap->n/bs;
2270   c->rstart        = mat->rmap->rstart/bs;
2271 
2272   c->ncolors       = nis;
2273   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2274   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2275   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2276   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2277   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2278   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2279 
2280   /* Allow access to data structures of local part of matrix */
2281   if (!baij->colmap) {
2282     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2283   }
2284   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2285   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2286 
2287   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2288   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2289 
2290   for (i=0; i<nis; i++) {
2291     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2292     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2293     c->ncolumns[i] = n;
2294     if (n) {
2295       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2296       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2297       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2298     } else {
2299       c->columns[i]  = 0;
2300     }
2301 
2302     if (ctype == IS_COLORING_GLOBAL){
2303       /* Determine the total (parallel) number of columns of this color */
2304       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2305       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2306 
2307       nn   = PetscMPIIntCast(n);
2308       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2309       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2310       if (!nctot) {
2311         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2312       }
2313 
2314       disp[0] = 0;
2315       for (j=1; j<size; j++) {
2316         disp[j] = disp[j-1] + ncolsonproc[j-1];
2317       }
2318 
2319       /* Get complete list of columns for color on each processor */
2320       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2321       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2322       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2323     } else if (ctype == IS_COLORING_GHOSTED){
2324       /* Determine local number of columns of this color on this process, including ghost points */
2325       nctot = n;
2326       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2327       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2328     } else {
2329       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2330     }
2331 
2332     /*
2333        Mark all rows affect by these columns
2334     */
2335     /* Temporary option to allow for debugging/testing */
2336     flg  = PETSC_FALSE;
2337     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2338     if (!flg) {/*-----------------------------------------------------------------------------*/
2339       /* crude, fast version */
2340       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2341       /* loop over columns*/
2342       for (j=0; j<nctot; j++) {
2343         if (ctype == IS_COLORING_GHOSTED) {
2344           col = ltog[cols[j]];
2345         } else {
2346           col  = cols[j];
2347         }
2348         if (col >= cstart && col < cend) {
2349           /* column is in diagonal block of matrix */
2350           rows = A_cj + A_ci[col-cstart];
2351           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2352         } else {
2353 #if defined (PETSC_USE_CTABLE)
2354           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2355 	  colb --;
2356 #else
2357           colb = baij->colmap[col] - 1;
2358 #endif
2359           if (colb == -1) {
2360             m = 0;
2361           } else {
2362             colb = colb/bs;
2363             rows = B_cj + B_ci[colb];
2364             m    = B_ci[colb+1] - B_ci[colb];
2365           }
2366         }
2367         /* loop over columns marking them in rowhit */
2368         for (k=0; k<m; k++) {
2369           rowhit[*rows++] = col + 1;
2370         }
2371       }
2372 
2373       /* count the number of hits */
2374       nrows = 0;
2375       for (j=0; j<M; j++) {
2376         if (rowhit[j]) nrows++;
2377       }
2378       c->nrows[i]         = nrows;
2379       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2380       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2381       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2382       nrows = 0;
2383       for (j=0; j<M; j++) {
2384         if (rowhit[j]) {
2385           c->rows[i][nrows]           = j;
2386           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2387           nrows++;
2388         }
2389       }
2390     } else {/*-------------------------------------------------------------------------------*/
2391       /* slow version, using rowhit as a linked list */
2392       PetscInt currentcol,fm,mfm;
2393       rowhit[M] = M;
2394       nrows     = 0;
2395       /* loop over columns*/
2396       for (j=0; j<nctot; j++) {
2397         if (ctype == IS_COLORING_GHOSTED) {
2398           col = ltog[cols[j]];
2399         } else {
2400           col  = cols[j];
2401         }
2402         if (col >= cstart && col < cend) {
2403           /* column is in diagonal block of matrix */
2404           rows = A_cj + A_ci[col-cstart];
2405           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2406         } else {
2407 #if defined (PETSC_USE_CTABLE)
2408           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2409           colb --;
2410 #else
2411           colb = baij->colmap[col] - 1;
2412 #endif
2413           if (colb == -1) {
2414             m = 0;
2415           } else {
2416             colb = colb/bs;
2417             rows = B_cj + B_ci[colb];
2418             m    = B_ci[colb+1] - B_ci[colb];
2419           }
2420         }
2421 
2422         /* loop over columns marking them in rowhit */
2423         fm    = M; /* fm points to first entry in linked list */
2424         for (k=0; k<m; k++) {
2425           currentcol = *rows++;
2426 	  /* is it already in the list? */
2427           do {
2428             mfm  = fm;
2429             fm   = rowhit[fm];
2430           } while (fm < currentcol);
2431           /* not in list so add it */
2432           if (fm != currentcol) {
2433             nrows++;
2434             columnsforrow[currentcol] = col;
2435             /* next three lines insert new entry into linked list */
2436             rowhit[mfm]               = currentcol;
2437             rowhit[currentcol]        = fm;
2438             fm                        = currentcol;
2439             /* fm points to present position in list since we know the columns are sorted */
2440           } else {
2441             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2442           }
2443         }
2444       }
2445       c->nrows[i]         = nrows;
2446       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2447       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2448       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2449       /* now store the linked list of rows into c->rows[i] */
2450       nrows = 0;
2451       fm    = rowhit[M];
2452       do {
2453         c->rows[i][nrows]            = fm;
2454         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2455         fm                           = rowhit[fm];
2456       } while (fm < M);
2457     } /* ---------------------------------------------------------------------------------------*/
2458     ierr = PetscFree(cols);CHKERRQ(ierr);
2459   }
2460 
2461   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2462   /*
2463        vscale will contain the "diagonal" on processor scalings followed by the off processor
2464   */
2465   if (ctype == IS_COLORING_GLOBAL) {
2466     PetscInt *garray;
2467     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2468     for (i=0; i<baij->B->cmap->n/bs; i++) {
2469       for (j=0; j<bs; j++) {
2470         garray[i*bs+j] = bs*baij->garray[i]+j;
2471       }
2472     }
2473     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2474     ierr = PetscFree(garray);CHKERRQ(ierr);
2475     CHKMEMQ;
2476     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2477     for (k=0; k<c->ncolors; k++) {
2478       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2479       for (l=0; l<c->nrows[k]; l++) {
2480         col = c->columnsforrow[k][l];
2481         if (col >= cstart && col < cend) {
2482           /* column is in diagonal block of matrix */
2483           colb = col - cstart;
2484         } else {
2485           /* column  is in "off-processor" part */
2486 #if defined (PETSC_USE_CTABLE)
2487           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2488           colb --;
2489 #else
2490           colb = baij->colmap[col] - 1;
2491 #endif
2492           colb = colb/bs;
2493           colb += cend - cstart;
2494         }
2495         c->vscaleforrow[k][l] = colb;
2496       }
2497     }
2498   } else if (ctype == IS_COLORING_GHOSTED) {
2499     /* Get gtol mapping */
2500     PetscInt N = mat->cmap->N, *gtol;
2501     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2502     for (i=0; i<N; i++) gtol[i] = -1;
2503     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2504 
2505     c->vscale = 0; /* will be created in MatFDColoringApply() */
2506     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2507     for (k=0; k<c->ncolors; k++) {
2508       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2509       for (l=0; l<c->nrows[k]; l++) {
2510         col = c->columnsforrow[k][l];      /* global column index */
2511         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2512       }
2513     }
2514     ierr = PetscFree(gtol);CHKERRQ(ierr);
2515   }
2516   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2517 
2518   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2519   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2520   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2521   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2522     CHKMEMQ;
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2528 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2529 {
2530   Mat            B;
2531   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2532   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2533   Mat_SeqAIJ     *b;
2534   PetscErrorCode ierr;
2535   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2536   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2537   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2538 
2539   PetscFunctionBegin;
2540   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2541   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2542 
2543   /* ----------------------------------------------------------------
2544      Tell every processor the number of nonzeros per row
2545   */
2546   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2547   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2548     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2549   }
2550   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2551   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2552   displs     = recvcounts + size;
2553   for (i=0; i<size; i++) {
2554     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2555     displs[i]     = A->rmap->range[i]/bs;
2556   }
2557 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2558   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2559 #else
2560   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2561 #endif
2562   /* ---------------------------------------------------------------
2563      Create the sequential matrix of the same type as the local block diagonal
2564   */
2565   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2566   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2567   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2568   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2569   b = (Mat_SeqAIJ *)B->data;
2570 
2571   /*--------------------------------------------------------------------
2572     Copy my part of matrix column indices over
2573   */
2574   sendcount  = ad->nz + bd->nz;
2575   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2576   a_jsendbuf = ad->j;
2577   b_jsendbuf = bd->j;
2578   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2579   cnt        = 0;
2580   for (i=0; i<n; i++) {
2581 
2582     /* put in lower diagonal portion */
2583     m = bd->i[i+1] - bd->i[i];
2584     while (m > 0) {
2585       /* is it above diagonal (in bd (compressed) numbering) */
2586       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2587       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2588       m--;
2589     }
2590 
2591     /* put in diagonal portion */
2592     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2593       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2594     }
2595 
2596     /* put in upper diagonal portion */
2597     while (m-- > 0) {
2598       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2599     }
2600   }
2601   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2602 
2603   /*--------------------------------------------------------------------
2604     Gather all column indices to all processors
2605   */
2606   for (i=0; i<size; i++) {
2607     recvcounts[i] = 0;
2608     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2609       recvcounts[i] += lens[j];
2610     }
2611   }
2612   displs[0]  = 0;
2613   for (i=1; i<size; i++) {
2614     displs[i] = displs[i-1] + recvcounts[i-1];
2615   }
2616 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2617   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2618 #else
2619   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2620 #endif
2621   /*--------------------------------------------------------------------
2622     Assemble the matrix into useable form (note numerical values not yet set)
2623   */
2624   /* set the b->ilen (length of each row) values */
2625   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2626   /* set the b->i indices */
2627   b->i[0] = 0;
2628   for (i=1; i<=A->rmap->N/bs; i++) {
2629     b->i[i] = b->i[i-1] + lens[i-1];
2630   }
2631   ierr = PetscFree(lens);CHKERRQ(ierr);
2632   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2633   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2634   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2635 
2636   if (A->symmetric){
2637     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2638   } else if (A->hermitian) {
2639     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2640   } else if (A->structurally_symmetric) {
2641     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2642   }
2643   *newmat = B;
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 #undef __FUNCT__
2648 #define __FUNCT__ "MatSOR_MPIBAIJ"
2649 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2650 {
2651   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2652   PetscErrorCode ierr;
2653   Vec            bb1 = 0;
2654 
2655   PetscFunctionBegin;
2656   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2657     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2658   }
2659 
2660   if (flag == SOR_APPLY_UPPER) {
2661     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2662     PetscFunctionReturn(0);
2663   }
2664 
2665   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2666     if (flag & SOR_ZERO_INITIAL_GUESS) {
2667       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2668       its--;
2669     }
2670 
2671     while (its--) {
2672       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2673       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2674 
2675       /* update rhs: bb1 = bb - B*x */
2676       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2677       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2678 
2679       /* local sweep */
2680       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2681     }
2682   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2683     if (flag & SOR_ZERO_INITIAL_GUESS) {
2684       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2685       its--;
2686     }
2687     while (its--) {
2688       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2689       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2690 
2691       /* update rhs: bb1 = bb - B*x */
2692       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2693       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2694 
2695       /* local sweep */
2696       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2697     }
2698   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2699     if (flag & SOR_ZERO_INITIAL_GUESS) {
2700       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2701       its--;
2702     }
2703     while (its--) {
2704       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2705       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2706 
2707       /* update rhs: bb1 = bb - B*x */
2708       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2709       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2710 
2711       /* local sweep */
2712       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2713     }
2714   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2715 
2716   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2717   PetscFunctionReturn(0);
2718 }
2719 
2720 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2721 
2722 
2723 /* -------------------------------------------------------------------*/
2724 static struct _MatOps MatOps_Values = {
2725        MatSetValues_MPIBAIJ,
2726        MatGetRow_MPIBAIJ,
2727        MatRestoreRow_MPIBAIJ,
2728        MatMult_MPIBAIJ,
2729 /* 4*/ MatMultAdd_MPIBAIJ,
2730        MatMultTranspose_MPIBAIJ,
2731        MatMultTransposeAdd_MPIBAIJ,
2732        0,
2733        0,
2734        0,
2735 /*10*/ 0,
2736        0,
2737        0,
2738        MatSOR_MPIBAIJ,
2739        MatTranspose_MPIBAIJ,
2740 /*15*/ MatGetInfo_MPIBAIJ,
2741        MatEqual_MPIBAIJ,
2742        MatGetDiagonal_MPIBAIJ,
2743        MatDiagonalScale_MPIBAIJ,
2744        MatNorm_MPIBAIJ,
2745 /*20*/ MatAssemblyBegin_MPIBAIJ,
2746        MatAssemblyEnd_MPIBAIJ,
2747        MatSetOption_MPIBAIJ,
2748        MatZeroEntries_MPIBAIJ,
2749 /*24*/ MatZeroRows_MPIBAIJ,
2750        0,
2751        0,
2752        0,
2753        0,
2754 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2755        0,
2756        0,
2757        0,
2758        0,
2759 /*34*/ MatDuplicate_MPIBAIJ,
2760        0,
2761        0,
2762        0,
2763        0,
2764 /*39*/ MatAXPY_MPIBAIJ,
2765        MatGetSubMatrices_MPIBAIJ,
2766        MatIncreaseOverlap_MPIBAIJ,
2767        MatGetValues_MPIBAIJ,
2768        MatCopy_MPIBAIJ,
2769 /*44*/ 0,
2770        MatScale_MPIBAIJ,
2771        0,
2772        0,
2773        0,
2774 /*49*/ MatSetBlockSize_MPIBAIJ,
2775        0,
2776        0,
2777        0,
2778        0,
2779 /*54*/ MatFDColoringCreate_MPIBAIJ,
2780        0,
2781        MatSetUnfactored_MPIBAIJ,
2782        MatPermute_MPIBAIJ,
2783        MatSetValuesBlocked_MPIBAIJ,
2784 /*59*/ MatGetSubMatrix_MPIBAIJ,
2785        MatDestroy_MPIBAIJ,
2786        MatView_MPIBAIJ,
2787        0,
2788        0,
2789 /*64*/ 0,
2790        0,
2791        0,
2792        0,
2793        0,
2794 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2795        0,
2796        0,
2797        0,
2798        0,
2799 /*74*/ 0,
2800        MatFDColoringApply_BAIJ,
2801        0,
2802        0,
2803        0,
2804 /*79*/ 0,
2805        0,
2806        0,
2807        0,
2808        MatLoad_MPIBAIJ,
2809 /*84*/ 0,
2810        0,
2811        0,
2812        0,
2813        0,
2814 /*89*/ 0,
2815        0,
2816        0,
2817        0,
2818        0,
2819 /*94*/ 0,
2820        0,
2821        0,
2822        0,
2823        0,
2824 /*99*/ 0,
2825        0,
2826        0,
2827        0,
2828        0,
2829 /*104*/0,
2830        MatRealPart_MPIBAIJ,
2831        MatImaginaryPart_MPIBAIJ,
2832        0,
2833        0,
2834 /*109*/0,
2835        0,
2836        0,
2837        0,
2838        0,
2839 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2840        0,
2841        MatGetGhosts_MPIBAIJ,
2842        0,
2843        0,
2844 /*119*/0,
2845        0,
2846        0,
2847        0
2848 };
2849 
2850 EXTERN_C_BEGIN
2851 #undef __FUNCT__
2852 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2853 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool  *iscopy,MatReuse reuse,Mat *a)
2854 {
2855   PetscFunctionBegin;
2856   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2857   *iscopy = PETSC_FALSE;
2858   PetscFunctionReturn(0);
2859 }
2860 EXTERN_C_END
2861 
2862 EXTERN_C_BEGIN
2863 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2864 EXTERN_C_END
2865 
2866 EXTERN_C_BEGIN
2867 #undef __FUNCT__
2868 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2869 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2870 {
2871   PetscInt       m,rstart,cstart,cend;
2872   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2873   const PetscInt *JJ=0;
2874   PetscScalar    *values=0;
2875   PetscErrorCode ierr;
2876 
2877   PetscFunctionBegin;
2878 
2879   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2880   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2881   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2882   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2883   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2884   m      = B->rmap->n/bs;
2885   rstart = B->rmap->rstart/bs;
2886   cstart = B->cmap->rstart/bs;
2887   cend   = B->cmap->rend/bs;
2888 
2889   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2890   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2891   for (i=0; i<m; i++) {
2892     nz = ii[i+1] - ii[i];
2893     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2894     nz_max = PetscMax(nz_max,nz);
2895     JJ  = jj + ii[i];
2896     for (j=0; j<nz; j++) {
2897       if (*JJ >= cstart) break;
2898       JJ++;
2899     }
2900     d = 0;
2901     for (; j<nz; j++) {
2902       if (*JJ++ >= cend) break;
2903       d++;
2904     }
2905     d_nnz[i] = d;
2906     o_nnz[i] = nz - d;
2907   }
2908   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2909   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2910 
2911   values = (PetscScalar*)V;
2912   if (!values) {
2913     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2914     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2915   }
2916   for (i=0; i<m; i++) {
2917     PetscInt          row    = i + rstart;
2918     PetscInt          ncols  = ii[i+1] - ii[i];
2919     const PetscInt    *icols = jj + ii[i];
2920     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2921     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2922   }
2923 
2924   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2925   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2926   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2927 
2928   PetscFunctionReturn(0);
2929 }
2930 EXTERN_C_END
2931 
2932 #undef __FUNCT__
2933 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2934 /*@C
2935    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2936    (the default parallel PETSc format).
2937 
2938    Collective on MPI_Comm
2939 
2940    Input Parameters:
2941 +  A - the matrix
2942 .  bs - the block size
2943 .  i - the indices into j for the start of each local row (starts with zero)
2944 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2945 -  v - optional values in the matrix
2946 
2947    Level: developer
2948 
2949 .keywords: matrix, aij, compressed row, sparse, parallel
2950 
2951 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2952 @*/
2953 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2954 {
2955   PetscErrorCode ierr;
2956 
2957   PetscFunctionBegin;
2958   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2959   PetscFunctionReturn(0);
2960 }
2961 
2962 EXTERN_C_BEGIN
2963 #undef __FUNCT__
2964 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2965 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2966 {
2967   Mat_MPIBAIJ    *b;
2968   PetscErrorCode ierr;
2969   PetscInt       i, newbs = PetscAbs(bs);
2970 
2971   PetscFunctionBegin;
2972   if (bs < 0) {
2973     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2974       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2975     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2976     bs   = PetscAbs(bs);
2977   }
2978   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2979   bs = newbs;
2980 
2981 
2982   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2983   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2984   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2985   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2986   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2987 
2988   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2989   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2990   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2991   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2992 
2993   if (d_nnz) {
2994     for (i=0; i<B->rmap->n/bs; i++) {
2995       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2996     }
2997   }
2998   if (o_nnz) {
2999     for (i=0; i<B->rmap->n/bs; i++) {
3000       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3001     }
3002   }
3003 
3004   b = (Mat_MPIBAIJ*)B->data;
3005   b->bs2 = bs*bs;
3006   b->mbs = B->rmap->n/bs;
3007   b->nbs = B->cmap->n/bs;
3008   b->Mbs = B->rmap->N/bs;
3009   b->Nbs = B->cmap->N/bs;
3010 
3011   for (i=0; i<=b->size; i++) {
3012     b->rangebs[i] = B->rmap->range[i]/bs;
3013   }
3014   b->rstartbs = B->rmap->rstart/bs;
3015   b->rendbs   = B->rmap->rend/bs;
3016   b->cstartbs = B->cmap->rstart/bs;
3017   b->cendbs   = B->cmap->rend/bs;
3018 
3019   if (!B->preallocated) {
3020     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3021     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3022     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3023     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3024     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3025     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3026     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3027     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3028     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3029   }
3030 
3031   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3032   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3033   B->preallocated = PETSC_TRUE;
3034   PetscFunctionReturn(0);
3035 }
3036 EXTERN_C_END
3037 
3038 EXTERN_C_BEGIN
3039 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3040 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3041 EXTERN_C_END
3042 
3043 
3044 EXTERN_C_BEGIN
3045 #undef __FUNCT__
3046 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3047 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3048 {
3049   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3050   PetscErrorCode ierr;
3051   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3052   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3053   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3054 
3055   PetscFunctionBegin;
3056   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3057   ii[0] = 0;
3058   CHKMEMQ;
3059   for (i=0; i<M; i++) {
3060     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3061     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3062     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3063     /* remove one from count of matrix has diagonal */
3064     for (j=id[i]; j<id[i+1]; j++) {
3065       if (jd[j] == i) {ii[i+1]--;break;}
3066     }
3067   CHKMEMQ;
3068   }
3069   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3070   cnt = 0;
3071   for (i=0; i<M; i++) {
3072     for (j=io[i]; j<io[i+1]; j++) {
3073       if (garray[jo[j]] > rstart) break;
3074       jj[cnt++] = garray[jo[j]];
3075   CHKMEMQ;
3076     }
3077     for (k=id[i]; k<id[i+1]; k++) {
3078       if (jd[k] != i) {
3079         jj[cnt++] = rstart + jd[k];
3080   CHKMEMQ;
3081       }
3082     }
3083     for (;j<io[i+1]; j++) {
3084       jj[cnt++] = garray[jo[j]];
3085   CHKMEMQ;
3086     }
3087   }
3088   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3089   PetscFunctionReturn(0);
3090 }
3091 EXTERN_C_END
3092 
3093 EXTERN_C_BEGIN
3094 #if defined(PETSC_HAVE_MUMPS)
3095 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3096 #endif
3097 EXTERN_C_END
3098 
3099 /*MC
3100    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3101 
3102    Options Database Keys:
3103 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3104 . -mat_block_size <bs> - set the blocksize used to store the matrix
3105 - -mat_use_hash_table <fact>
3106 
3107   Level: beginner
3108 
3109 .seealso: MatCreateMPIBAIJ
3110 M*/
3111 
3112 EXTERN_C_BEGIN
3113 #undef __FUNCT__
3114 #define __FUNCT__ "MatCreate_MPIBAIJ"
3115 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
3116 {
3117   Mat_MPIBAIJ    *b;
3118   PetscErrorCode ierr;
3119   PetscBool      flg;
3120 
3121   PetscFunctionBegin;
3122   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3123   B->data = (void*)b;
3124 
3125 
3126   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3127   B->mapping    = 0;
3128   B->assembled  = PETSC_FALSE;
3129 
3130   B->insertmode = NOT_SET_VALUES;
3131   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3132   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3133 
3134   /* build local table of row and column ownerships */
3135   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3136 
3137   /* build cache for off array entries formed */
3138   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3139   b->donotstash  = PETSC_FALSE;
3140   b->colmap      = PETSC_NULL;
3141   b->garray      = PETSC_NULL;
3142   b->roworiented = PETSC_TRUE;
3143 
3144   /* stuff used in block assembly */
3145   b->barray       = 0;
3146 
3147   /* stuff used for matrix vector multiply */
3148   b->lvec         = 0;
3149   b->Mvctx        = 0;
3150 
3151   /* stuff for MatGetRow() */
3152   b->rowindices   = 0;
3153   b->rowvalues    = 0;
3154   b->getrowactive = PETSC_FALSE;
3155 
3156   /* hash table stuff */
3157   b->ht           = 0;
3158   b->hd           = 0;
3159   b->ht_size      = 0;
3160   b->ht_flag      = PETSC_FALSE;
3161   b->ht_fact      = 0;
3162   b->ht_total_ct  = 0;
3163   b->ht_insert_ct = 0;
3164 
3165   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3166     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3167     if (flg) {
3168       PetscReal fact = 1.39;
3169       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3170       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3171       if (fact <= 1.0) fact = 1.39;
3172       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3173       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3174     }
3175   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3176 
3177 #if defined(PETSC_HAVE_MUMPS)
3178   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3179 #endif
3180   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3181                                      "MatConvert_MPIBAIJ_MPIAdj",
3182                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3183   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3184                                      "MatStoreValues_MPIBAIJ",
3185                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3186   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3187                                      "MatRetrieveValues_MPIBAIJ",
3188                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3189   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3190                                      "MatGetDiagonalBlock_MPIBAIJ",
3191                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3192   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3193                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3194                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3195   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3196 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3197 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3198   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3199                                      "MatDiagonalScaleLocal_MPIBAIJ",
3200                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3201   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3202                                      "MatSetHashTableFactor_MPIBAIJ",
3203                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3204   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3205   PetscFunctionReturn(0);
3206 }
3207 EXTERN_C_END
3208 
3209 /*MC
3210    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3211 
3212    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3213    and MATMPIBAIJ otherwise.
3214 
3215    Options Database Keys:
3216 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3217 
3218   Level: beginner
3219 
3220 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3221 M*/
3222 
3223 EXTERN_C_BEGIN
3224 #undef __FUNCT__
3225 #define __FUNCT__ "MatCreate_BAIJ"
3226 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3227 {
3228   PetscErrorCode ierr;
3229   PetscMPIInt    size;
3230 
3231   PetscFunctionBegin;
3232   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3233   if (size == 1) {
3234     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3235   } else {
3236     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3237   }
3238   PetscFunctionReturn(0);
3239 }
3240 EXTERN_C_END
3241 
3242 #undef __FUNCT__
3243 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3244 /*@C
3245    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3246    (block compressed row).  For good matrix assembly performance
3247    the user should preallocate the matrix storage by setting the parameters
3248    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3249    performance can be increased by more than a factor of 50.
3250 
3251    Collective on Mat
3252 
3253    Input Parameters:
3254 +  A - the matrix
3255 .  bs   - size of blockk
3256 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3257            submatrix  (same for all local rows)
3258 .  d_nnz - array containing the number of block nonzeros in the various block rows
3259            of the in diagonal portion of the local (possibly different for each block
3260            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3261 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3262            submatrix (same for all local rows).
3263 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3264            off-diagonal portion of the local submatrix (possibly different for
3265            each block row) or PETSC_NULL.
3266 
3267    If the *_nnz parameter is given then the *_nz parameter is ignored
3268 
3269    Options Database Keys:
3270 +   -mat_block_size - size of the blocks to use
3271 -   -mat_use_hash_table <fact>
3272 
3273    Notes:
3274    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3275    than it must be used on all processors that share the object for that argument.
3276 
3277    Storage Information:
3278    For a square global matrix we define each processor's diagonal portion
3279    to be its local rows and the corresponding columns (a square submatrix);
3280    each processor's off-diagonal portion encompasses the remainder of the
3281    local matrix (a rectangular submatrix).
3282 
3283    The user can specify preallocated storage for the diagonal part of
3284    the local submatrix with either d_nz or d_nnz (not both).  Set
3285    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3286    memory allocation.  Likewise, specify preallocated storage for the
3287    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3288 
3289    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3290    the figure below we depict these three local rows and all columns (0-11).
3291 
3292 .vb
3293            0 1 2 3 4 5 6 7 8 9 10 11
3294           -------------------
3295    row 3  |  o o o d d d o o o o o o
3296    row 4  |  o o o d d d o o o o o o
3297    row 5  |  o o o d d d o o o o o o
3298           -------------------
3299 .ve
3300 
3301    Thus, any entries in the d locations are stored in the d (diagonal)
3302    submatrix, and any entries in the o locations are stored in the
3303    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3304    stored simply in the MATSEQBAIJ format for compressed row storage.
3305 
3306    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3307    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3308    In general, for PDE problems in which most nonzeros are near the diagonal,
3309    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3310    or you will get TERRIBLE performance; see the users' manual chapter on
3311    matrices.
3312 
3313    You can call MatGetInfo() to get information on how effective the preallocation was;
3314    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3315    You can also run with the option -info and look for messages with the string
3316    malloc in them to see if additional memory allocation was needed.
3317 
3318    Level: intermediate
3319 
3320 .keywords: matrix, block, aij, compressed row, sparse, parallel
3321 
3322 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3323 @*/
3324 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3325 {
3326   PetscErrorCode ierr;
3327 
3328   PetscFunctionBegin;
3329   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3330   PetscFunctionReturn(0);
3331 }
3332 
3333 #undef __FUNCT__
3334 #define __FUNCT__ "MatCreateMPIBAIJ"
3335 /*@C
3336    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3337    (block compressed row).  For good matrix assembly performance
3338    the user should preallocate the matrix storage by setting the parameters
3339    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3340    performance can be increased by more than a factor of 50.
3341 
3342    Collective on MPI_Comm
3343 
3344    Input Parameters:
3345 +  comm - MPI communicator
3346 .  bs   - size of blockk
3347 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3348            This value should be the same as the local size used in creating the
3349            y vector for the matrix-vector product y = Ax.
3350 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3351            This value should be the same as the local size used in creating the
3352            x vector for the matrix-vector product y = Ax.
3353 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3354 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3355 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3356            submatrix  (same for all local rows)
3357 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3358            of the in diagonal portion of the local (possibly different for each block
3359            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3360 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3361            submatrix (same for all local rows).
3362 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3363            off-diagonal portion of the local submatrix (possibly different for
3364            each block row) or PETSC_NULL.
3365 
3366    Output Parameter:
3367 .  A - the matrix
3368 
3369    Options Database Keys:
3370 +   -mat_block_size - size of the blocks to use
3371 -   -mat_use_hash_table <fact>
3372 
3373    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3374    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3375    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3376 
3377    Notes:
3378    If the *_nnz parameter is given then the *_nz parameter is ignored
3379 
3380    A nonzero block is any block that as 1 or more nonzeros in it
3381 
3382    The user MUST specify either the local or global matrix dimensions
3383    (possibly both).
3384 
3385    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3386    than it must be used on all processors that share the object for that argument.
3387 
3388    Storage Information:
3389    For a square global matrix we define each processor's diagonal portion
3390    to be its local rows and the corresponding columns (a square submatrix);
3391    each processor's off-diagonal portion encompasses the remainder of the
3392    local matrix (a rectangular submatrix).
3393 
3394    The user can specify preallocated storage for the diagonal part of
3395    the local submatrix with either d_nz or d_nnz (not both).  Set
3396    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3397    memory allocation.  Likewise, specify preallocated storage for the
3398    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3399 
3400    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3401    the figure below we depict these three local rows and all columns (0-11).
3402 
3403 .vb
3404            0 1 2 3 4 5 6 7 8 9 10 11
3405           -------------------
3406    row 3  |  o o o d d d o o o o o o
3407    row 4  |  o o o d d d o o o o o o
3408    row 5  |  o o o d d d o o o o o o
3409           -------------------
3410 .ve
3411 
3412    Thus, any entries in the d locations are stored in the d (diagonal)
3413    submatrix, and any entries in the o locations are stored in the
3414    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3415    stored simply in the MATSEQBAIJ format for compressed row storage.
3416 
3417    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3418    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3419    In general, for PDE problems in which most nonzeros are near the diagonal,
3420    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3421    or you will get TERRIBLE performance; see the users' manual chapter on
3422    matrices.
3423 
3424    Level: intermediate
3425 
3426 .keywords: matrix, block, aij, compressed row, sparse, parallel
3427 
3428 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3429 @*/
3430 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3431 {
3432   PetscErrorCode ierr;
3433   PetscMPIInt    size;
3434 
3435   PetscFunctionBegin;
3436   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3437   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3438   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3439   if (size > 1) {
3440     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3441     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3442   } else {
3443     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3444     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3445   }
3446   PetscFunctionReturn(0);
3447 }
3448 
3449 #undef __FUNCT__
3450 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3451 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3452 {
3453   Mat            mat;
3454   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3455   PetscErrorCode ierr;
3456   PetscInt       len=0;
3457 
3458   PetscFunctionBegin;
3459   *newmat       = 0;
3460   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3461   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3462   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3463   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3464 
3465   mat->factortype   = matin->factortype;
3466   mat->preallocated = PETSC_TRUE;
3467   mat->assembled    = PETSC_TRUE;
3468   mat->insertmode   = NOT_SET_VALUES;
3469 
3470   a      = (Mat_MPIBAIJ*)mat->data;
3471   mat->rmap->bs  = matin->rmap->bs;
3472   a->bs2   = oldmat->bs2;
3473   a->mbs   = oldmat->mbs;
3474   a->nbs   = oldmat->nbs;
3475   a->Mbs   = oldmat->Mbs;
3476   a->Nbs   = oldmat->Nbs;
3477 
3478   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3479   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3480 
3481   a->size         = oldmat->size;
3482   a->rank         = oldmat->rank;
3483   a->donotstash   = oldmat->donotstash;
3484   a->roworiented  = oldmat->roworiented;
3485   a->rowindices   = 0;
3486   a->rowvalues    = 0;
3487   a->getrowactive = PETSC_FALSE;
3488   a->barray       = 0;
3489   a->rstartbs     = oldmat->rstartbs;
3490   a->rendbs       = oldmat->rendbs;
3491   a->cstartbs     = oldmat->cstartbs;
3492   a->cendbs       = oldmat->cendbs;
3493 
3494   /* hash table stuff */
3495   a->ht           = 0;
3496   a->hd           = 0;
3497   a->ht_size      = 0;
3498   a->ht_flag      = oldmat->ht_flag;
3499   a->ht_fact      = oldmat->ht_fact;
3500   a->ht_total_ct  = 0;
3501   a->ht_insert_ct = 0;
3502 
3503   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3504   if (oldmat->colmap) {
3505 #if defined (PETSC_USE_CTABLE)
3506   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3507 #else
3508   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3509   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3510   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3511 #endif
3512   } else a->colmap = 0;
3513 
3514   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3515     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3516     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3517     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3518   } else a->garray = 0;
3519 
3520   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3521   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3522   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3523   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3524   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3525 
3526   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3527   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3528   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3529   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3530   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3531   *newmat = mat;
3532 
3533   PetscFunctionReturn(0);
3534 }
3535 
3536 #undef __FUNCT__
3537 #define __FUNCT__ "MatLoad_MPIBAIJ"
3538 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3539 {
3540   PetscErrorCode ierr;
3541   int            fd;
3542   PetscInt       i,nz,j,rstart,rend;
3543   PetscScalar    *vals,*buf;
3544   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3545   MPI_Status     status;
3546   PetscMPIInt    rank,size,maxnz;
3547   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3548   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3549   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3550   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3551   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3552   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3553 
3554   PetscFunctionBegin;
3555   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3556     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3557   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3558 
3559   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3560   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3561   if (!rank) {
3562     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3563     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3564     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3565   }
3566 
3567   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3568 
3569   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3570   M = header[1]; N = header[2];
3571 
3572   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3573   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3574   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3575 
3576   /* If global sizes are set, check if they are consistent with that given in the file */
3577   if (sizesset) {
3578     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3579   }
3580   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3581   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3582 
3583   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3584 
3585   /*
3586      This code adds extra rows to make sure the number of rows is
3587      divisible by the blocksize
3588   */
3589   Mbs        = M/bs;
3590   extra_rows = bs - M + bs*Mbs;
3591   if (extra_rows == bs) extra_rows = 0;
3592   else                  Mbs++;
3593   if (extra_rows && !rank) {
3594     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3595   }
3596 
3597   /* determine ownership of all rows */
3598   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3599     mbs        = Mbs/size + ((Mbs % size) > rank);
3600     m          = mbs*bs;
3601   } else { /* User set */
3602     m          = newmat->rmap->n;
3603     mbs        = m/bs;
3604   }
3605   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3606   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3607 
3608   /* process 0 needs enough room for process with most rows */
3609   if (!rank) {
3610     mmax = rowners[1];
3611     for (i=2; i<size; i++) {
3612       mmax = PetscMax(mmax,rowners[i]);
3613     }
3614     mmax*=bs;
3615   } else mmax = m;
3616 
3617   rowners[0] = 0;
3618   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3619   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3620   rstart = rowners[rank];
3621   rend   = rowners[rank+1];
3622 
3623   /* distribute row lengths to all processors */
3624   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3625   if (!rank) {
3626     mend = m;
3627     if (size == 1) mend = mend - extra_rows;
3628     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3629     for (j=mend; j<m; j++) locrowlens[j] = 1;
3630     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3631     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3632     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3633     for (j=0; j<m; j++) {
3634       procsnz[0] += locrowlens[j];
3635     }
3636     for (i=1; i<size; i++) {
3637       mend = browners[i+1] - browners[i];
3638       if (i == size-1) mend = mend - extra_rows;
3639       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3640       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3641       /* calculate the number of nonzeros on each processor */
3642       for (j=0; j<browners[i+1]-browners[i]; j++) {
3643         procsnz[i] += rowlengths[j];
3644       }
3645       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3646     }
3647     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3648   } else {
3649     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3650   }
3651 
3652   if (!rank) {
3653     /* determine max buffer needed and allocate it */
3654     maxnz = procsnz[0];
3655     for (i=1; i<size; i++) {
3656       maxnz = PetscMax(maxnz,procsnz[i]);
3657     }
3658     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3659 
3660     /* read in my part of the matrix column indices  */
3661     nz     = procsnz[0];
3662     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3663     mycols = ibuf;
3664     if (size == 1)  nz -= extra_rows;
3665     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3666     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3667 
3668     /* read in every ones (except the last) and ship off */
3669     for (i=1; i<size-1; i++) {
3670       nz   = procsnz[i];
3671       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3672       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3673     }
3674     /* read in the stuff for the last proc */
3675     if (size != 1) {
3676       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3677       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3678       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3679       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3680     }
3681     ierr = PetscFree(cols);CHKERRQ(ierr);
3682   } else {
3683     /* determine buffer space needed for message */
3684     nz = 0;
3685     for (i=0; i<m; i++) {
3686       nz += locrowlens[i];
3687     }
3688     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3689     mycols = ibuf;
3690     /* receive message of column indices*/
3691     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3692     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3693     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3694   }
3695 
3696   /* loop over local rows, determining number of off diagonal entries */
3697   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3698   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3699   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3700   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3701   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3702   rowcount = 0; nzcount = 0;
3703   for (i=0; i<mbs; i++) {
3704     dcount  = 0;
3705     odcount = 0;
3706     for (j=0; j<bs; j++) {
3707       kmax = locrowlens[rowcount];
3708       for (k=0; k<kmax; k++) {
3709         tmp = mycols[nzcount++]/bs;
3710         if (!mask[tmp]) {
3711           mask[tmp] = 1;
3712           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3713           else masked1[dcount++] = tmp;
3714         }
3715       }
3716       rowcount++;
3717     }
3718 
3719     dlens[i]  = dcount;
3720     odlens[i] = odcount;
3721 
3722     /* zero out the mask elements we set */
3723     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3724     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3725   }
3726 
3727 
3728   if (!sizesset) {
3729     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3730   }
3731   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3732 
3733   if (!rank) {
3734     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3735     /* read in my part of the matrix numerical values  */
3736     nz = procsnz[0];
3737     vals = buf;
3738     mycols = ibuf;
3739     if (size == 1)  nz -= extra_rows;
3740     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3741     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3742 
3743     /* insert into matrix */
3744     jj      = rstart*bs;
3745     for (i=0; i<m; i++) {
3746       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3747       mycols += locrowlens[i];
3748       vals   += locrowlens[i];
3749       jj++;
3750     }
3751     /* read in other processors (except the last one) and ship out */
3752     for (i=1; i<size-1; i++) {
3753       nz   = procsnz[i];
3754       vals = buf;
3755       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3756       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3757     }
3758     /* the last proc */
3759     if (size != 1){
3760       nz   = procsnz[i] - extra_rows;
3761       vals = buf;
3762       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3763       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3764       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3765     }
3766     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3767   } else {
3768     /* receive numeric values */
3769     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3770 
3771     /* receive message of values*/
3772     vals   = buf;
3773     mycols = ibuf;
3774     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3775     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3776     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3777 
3778     /* insert into matrix */
3779     jj      = rstart*bs;
3780     for (i=0; i<m; i++) {
3781       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3782       mycols += locrowlens[i];
3783       vals   += locrowlens[i];
3784       jj++;
3785     }
3786   }
3787   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3788   ierr = PetscFree(buf);CHKERRQ(ierr);
3789   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3790   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3791   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3792   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3793   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3794   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3795 
3796   PetscFunctionReturn(0);
3797 }
3798 
3799 #undef __FUNCT__
3800 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3801 /*@
3802    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3803 
3804    Input Parameters:
3805 .  mat  - the matrix
3806 .  fact - factor
3807 
3808    Not Collective, each process can use a different factor
3809 
3810    Level: advanced
3811 
3812   Notes:
3813    This can also be set by the command line option: -mat_use_hash_table <fact>
3814 
3815 .keywords: matrix, hashtable, factor, HT
3816 
3817 .seealso: MatSetOption()
3818 @*/
3819 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3820 {
3821   PetscErrorCode ierr;
3822 
3823   PetscFunctionBegin;
3824   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3825   PetscFunctionReturn(0);
3826 }
3827 
3828 EXTERN_C_BEGIN
3829 #undef __FUNCT__
3830 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3831 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3832 {
3833   Mat_MPIBAIJ *baij;
3834 
3835   PetscFunctionBegin;
3836   baij = (Mat_MPIBAIJ*)mat->data;
3837   baij->ht_fact = fact;
3838   PetscFunctionReturn(0);
3839 }
3840 EXTERN_C_END
3841 
3842 #undef __FUNCT__
3843 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3844 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3845 {
3846   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3847   PetscFunctionBegin;
3848   *Ad     = a->A;
3849   *Ao     = a->B;
3850   *colmap = a->garray;
3851   PetscFunctionReturn(0);
3852 }
3853 
3854 /*
3855     Special version for direct calls from Fortran (to eliminate two function call overheads
3856 */
3857 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3858 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3859 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3860 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3861 #endif
3862 
3863 #undef __FUNCT__
3864 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3865 /*@C
3866   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3867 
3868   Collective on Mat
3869 
3870   Input Parameters:
3871 + mat - the matrix
3872 . min - number of input rows
3873 . im - input rows
3874 . nin - number of input columns
3875 . in - input columns
3876 . v - numerical values input
3877 - addvin - INSERT_VALUES or ADD_VALUES
3878 
3879   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3880 
3881   Level: advanced
3882 
3883 .seealso:   MatSetValuesBlocked()
3884 @*/
3885 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3886 {
3887   /* convert input arguments to C version */
3888   Mat             mat = *matin;
3889   PetscInt        m = *min, n = *nin;
3890   InsertMode      addv = *addvin;
3891 
3892   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3893   const MatScalar *value;
3894   MatScalar       *barray=baij->barray;
3895   PetscBool       roworiented = baij->roworiented;
3896   PetscErrorCode  ierr;
3897   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3898   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3899   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3900 
3901   PetscFunctionBegin;
3902   /* tasks normally handled by MatSetValuesBlocked() */
3903   if (mat->insertmode == NOT_SET_VALUES) {
3904     mat->insertmode = addv;
3905   }
3906 #if defined(PETSC_USE_DEBUG)
3907   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3908   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3909 #endif
3910   if (mat->assembled) {
3911     mat->was_assembled = PETSC_TRUE;
3912     mat->assembled     = PETSC_FALSE;
3913   }
3914   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3915 
3916 
3917   if(!barray) {
3918     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3919     baij->barray = barray;
3920   }
3921 
3922   if (roworiented) {
3923     stepval = (n-1)*bs;
3924   } else {
3925     stepval = (m-1)*bs;
3926   }
3927   for (i=0; i<m; i++) {
3928     if (im[i] < 0) continue;
3929 #if defined(PETSC_USE_DEBUG)
3930     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3931 #endif
3932     if (im[i] >= rstart && im[i] < rend) {
3933       row = im[i] - rstart;
3934       for (j=0; j<n; j++) {
3935         /* If NumCol = 1 then a copy is not required */
3936         if ((roworiented) && (n == 1)) {
3937           barray = (MatScalar*)v + i*bs2;
3938         } else if((!roworiented) && (m == 1)) {
3939           barray = (MatScalar*)v + j*bs2;
3940         } else { /* Here a copy is required */
3941           if (roworiented) {
3942             value = v + i*(stepval+bs)*bs + j*bs;
3943           } else {
3944             value = v + j*(stepval+bs)*bs + i*bs;
3945           }
3946           for (ii=0; ii<bs; ii++,value+=stepval) {
3947             for (jj=0; jj<bs; jj++) {
3948               *barray++  = *value++;
3949             }
3950           }
3951           barray -=bs2;
3952         }
3953 
3954         if (in[j] >= cstart && in[j] < cend){
3955           col  = in[j] - cstart;
3956           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3957         }
3958         else if (in[j] < 0) continue;
3959 #if defined(PETSC_USE_DEBUG)
3960         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3961 #endif
3962         else {
3963           if (mat->was_assembled) {
3964             if (!baij->colmap) {
3965               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3966             }
3967 
3968 #if defined(PETSC_USE_DEBUG)
3969 #if defined (PETSC_USE_CTABLE)
3970             { PetscInt data;
3971               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3972               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3973             }
3974 #else
3975             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3976 #endif
3977 #endif
3978 #if defined (PETSC_USE_CTABLE)
3979 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3980             col  = (col - 1)/bs;
3981 #else
3982             col = (baij->colmap[in[j]] - 1)/bs;
3983 #endif
3984             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3985               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3986               col =  in[j];
3987             }
3988           }
3989           else col = in[j];
3990           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3991         }
3992       }
3993     } else {
3994       if (!baij->donotstash) {
3995         if (roworiented) {
3996           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3997         } else {
3998           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3999         }
4000       }
4001     }
4002   }
4003 
4004   /* task normally handled by MatSetValuesBlocked() */
4005   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4006   PetscFunctionReturn(0);
4007 }
4008 
4009 #undef __FUNCT__
4010 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4011 /*@
4012      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4013          CSR format the local rows.
4014 
4015    Collective on MPI_Comm
4016 
4017    Input Parameters:
4018 +  comm - MPI communicator
4019 .  bs - the block size, only a block size of 1 is supported
4020 .  m - number of local rows (Cannot be PETSC_DECIDE)
4021 .  n - This value should be the same as the local size used in creating the
4022        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4023        calculated if N is given) For square matrices n is almost always m.
4024 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4025 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4026 .   i - row indices
4027 .   j - column indices
4028 -   a - matrix values
4029 
4030    Output Parameter:
4031 .   mat - the matrix
4032 
4033    Level: intermediate
4034 
4035    Notes:
4036        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4037      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4038      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4039 
4040        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4041 
4042 .keywords: matrix, aij, compressed row, sparse, parallel
4043 
4044 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4045           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
4046 @*/
4047 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4048 {
4049   PetscErrorCode ierr;
4050 
4051 
4052  PetscFunctionBegin;
4053   if (i[0]) {
4054     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4055   }
4056   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4057   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4058   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4059   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4060   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4061   PetscFunctionReturn(0);
4062 }
4063