xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 2b40b63f2d83c74feb0a2167f386cf143c8f6e2d)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "petscblaslapack.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
20 {
21   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
22   PetscErrorCode ierr;
23   PetscInt       i,*idxb = 0;
24   PetscScalar    *va,*vb;
25   Vec            vtmp;
26 
27   PetscFunctionBegin;
28   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
29   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
30   if (idx) {
31     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++){
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
41   }
42 
43   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
44   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
45   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
46   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 EXTERN_C_BEGIN
51 #undef __FUNCT__
52 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
60   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 /*
81      Local utility routine that creates a mapping from the global column
82    number to the local number in the off-diagonal part of the local
83    storage of the matrix.  This is done in a non scalable way since the
84    length of colmap equals the global matrix length.
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89 {
90   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
91   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
92   PetscErrorCode ierr;
93   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
94 
95   PetscFunctionBegin;
96 #if defined (PETSC_USE_CTABLE)
97   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
98   for (i=0; i<nbs; i++){
99     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
100   }
101 #else
102   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
103   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
105   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106 #endif
107   PetscFunctionReturn(0);
108 }
109 
110 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
111 { \
112  \
113     brow = row/bs;  \
114     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
115     rmax = aimax[brow]; nrow = ailen[brow]; \
116       bcol = col/bs; \
117       ridx = row % bs; cidx = col % bs; \
118       low = 0; high = nrow; \
119       while (high-low > 3) { \
120         t = (low+high)/2; \
121         if (rp[t] > bcol) high = t; \
122         else              low  = t; \
123       } \
124       for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > bcol) break; \
126         if (rp[_i] == bcol) { \
127           bap  = ap +  bs2*_i + bs*cidx + ridx; \
128           if (addv == ADD_VALUES) *bap += value;  \
129           else                    *bap  = value;  \
130           goto a_noinsert; \
131         } \
132       } \
133       if (a->nonew == 1) goto a_noinsert; \
134       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
136       N = nrow++ - 1;  \
137       /* shift up all the later entries in this row */ \
138       for (ii=N; ii>=_i; ii--) { \
139         rp[ii+1] = rp[ii]; \
140         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
141       } \
142       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
143       rp[_i]                      = bcol;  \
144       ap[bs2*_i + bs*cidx + ridx] = value;  \
145       a_noinsert:; \
146     ailen[brow] = nrow; \
147 }
148 
149 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
150 { \
151     brow = row/bs;  \
152     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
153     rmax = bimax[brow]; nrow = bilen[brow]; \
154       bcol = col/bs; \
155       ridx = row % bs; cidx = col % bs; \
156       low = 0; high = nrow; \
157       while (high-low > 3) { \
158         t = (low+high)/2; \
159         if (rp[t] > bcol) high = t; \
160         else              low  = t; \
161       } \
162       for (_i=low; _i<high; _i++) { \
163         if (rp[_i] > bcol) break; \
164         if (rp[_i] == bcol) { \
165           bap  = ap +  bs2*_i + bs*cidx + ridx; \
166           if (addv == ADD_VALUES) *bap += value;  \
167           else                    *bap  = value;  \
168           goto b_noinsert; \
169         } \
170       } \
171       if (b->nonew == 1) goto b_noinsert; \
172       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
173       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
174       CHKMEMQ;\
175       N = nrow++ - 1;  \
176       /* shift up all the later entries in this row */ \
177       for (ii=N; ii>=_i; ii--) { \
178         rp[ii+1] = rp[ii]; \
179         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
180       } \
181       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
182       rp[_i]                      = bcol;  \
183       ap[bs2*_i + bs*cidx + ridx] = value;  \
184       b_noinsert:; \
185     bilen[brow] = nrow; \
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatSetValues_MPIBAIJ"
190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
191 {
192   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
193   MatScalar      value;
194   PetscBool      roworiented = baij->roworiented;
195   PetscErrorCode ierr;
196   PetscInt       i,j,row,col;
197   PetscInt       rstart_orig=mat->rmap->rstart;
198   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
199   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
200 
201   /* Some Variables required in the macro */
202   Mat            A = baij->A;
203   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
204   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
205   MatScalar      *aa=a->a;
206 
207   Mat            B = baij->B;
208   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
209   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
210   MatScalar      *ba=b->a;
211 
212   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
213   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
214   MatScalar      *ap,*bap;
215 
216   PetscFunctionBegin;
217   if (v) PetscValidScalarPointer(v,6);
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_DEBUG)
221     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
222 #endif
223     if (im[i] >= rstart_orig && im[i] < rend_orig) {
224       row = im[i] - rstart_orig;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart_orig && in[j] < cend_orig){
227           col = in[j] - cstart_orig;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         } else if (in[j] < 0) continue;
232 #if defined(PETSC_USE_DEBUG)
233         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240 #if defined (PETSC_USE_CTABLE)
241             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
242             col  = col - 1;
243 #else
244             col = baij->colmap[in[j]/bs] - 1;
245 #endif
246             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
247               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
248               col =  in[j];
249               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
250               B = baij->B;
251               b = (Mat_SeqBAIJ*)(B)->data;
252               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
253               ba=b->a;
254             } else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         if (roworiented) {
265           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
266         } else {
267           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
268         }
269       }
270     }
271   }
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
278 {
279   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
280   const PetscScalar *value;
281   MatScalar         *barray=baij->barray;
282   PetscBool         roworiented = baij->roworiented;
283   PetscErrorCode    ierr;
284   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
285   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
286   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
287 
288   PetscFunctionBegin;
289   if(!barray) {
290     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
291     baij->barray = barray;
292   }
293 
294   if (roworiented) {
295     stepval = (n-1)*bs;
296   } else {
297     stepval = (m-1)*bs;
298   }
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316 	    value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) {
320               barray[jj]  = value[jj];
321             }
322             barray += bs;
323           }
324           barray -= bs2;
325         }
326 
327         if (in[j] >= cstart && in[j] < cend){
328           col  = in[j] - cstart;
329           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
330         }
331         else if (in[j] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
334 #endif
335         else {
336           if (mat->was_assembled) {
337             if (!baij->colmap) {
338               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
339             }
340 
341 #if defined(PETSC_USE_DEBUG)
342 #if defined (PETSC_USE_CTABLE)
343             { PetscInt data;
344               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
345               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346             }
347 #else
348             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
349 #endif
350 #endif
351 #if defined (PETSC_USE_CTABLE)
352 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
353             col  = (col - 1)/bs;
354 #else
355             col = (baij->colmap[in[j]] - 1)/bs;
356 #endif
357             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
358               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
359               col =  in[j];
360             }
361           }
362           else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
389   PetscBool      roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408       row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
413         /* Look up PetscInto the Hash Table */
414         key = (row/bs)*Nbs+(col/bs)+1;
415         h1  = HASH(size,key,tmp);
416 
417 
418         idx = h1;
419 #if defined(PETSC_USE_DEBUG)
420         insert_ct++;
421         total_ct++;
422         if (HT[idx] != key) {
423           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
424           if (idx == size) {
425             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
426             if (idx == h1) {
427               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
428             }
429           }
430         }
431 #else
432         if (HT[idx] != key) {
433           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
434           if (idx == size) {
435             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
436             if (idx == h1) {
437               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
438             }
439           }
440         }
441 #endif
442         /* A HASH table entry is found, so insert the values at the correct address */
443         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
444         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
445       }
446     } else {
447       if (!baij->donotstash) {
448         if (roworiented) {
449           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
450         } else {
451           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
452         }
453       }
454     }
455   }
456 #if defined(PETSC_USE_DEBUG)
457   baij->ht_total_ct = total_ct;
458   baij->ht_insert_ct = insert_ct;
459 #endif
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
466 {
467   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
468   PetscBool         roworiented = baij->roworiented;
469   PetscErrorCode    ierr;
470   PetscInt          i,j,ii,jj,row,col;
471   PetscInt          rstart=baij->rstartbs;
472   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
473   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
474   PetscReal         tmp;
475   MatScalar         **HD = baij->hd,*baij_a;
476   const PetscScalar *v_t,*value;
477 #if defined(PETSC_USE_DEBUG)
478   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
479 #endif
480 
481   PetscFunctionBegin;
482 
483   if (roworiented) {
484     stepval = (n-1)*bs;
485   } else {
486     stepval = (m-1)*bs;
487   }
488   for (i=0; i<m; i++) {
489 #if defined(PETSC_USE_DEBUG)
490     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
491     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
492 #endif
493     row   = im[i];
494     v_t   = v + i*nbs2;
495     if (row >= rstart && row < rend) {
496       for (j=0; j<n; j++) {
497         col = in[j];
498 
499         /* Look up into the Hash Table */
500         key = row*Nbs+col+1;
501         h1  = HASH(size,key,tmp);
502 
503         idx = h1;
504 #if defined(PETSC_USE_DEBUG)
505         total_ct++;
506         insert_ct++;
507        if (HT[idx] != key) {
508           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
509           if (idx == size) {
510             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
511             if (idx == h1) {
512               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
513             }
514           }
515         }
516 #else
517         if (HT[idx] != key) {
518           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
519           if (idx == size) {
520             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
521             if (idx == h1) {
522               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
523             }
524           }
525         }
526 #endif
527         baij_a = HD[idx];
528         if (roworiented) {
529           /*value = v + i*(stepval+bs)*bs + j*bs;*/
530           /* value = v + (i*(stepval+bs)+j)*bs; */
531           value = v_t;
532           v_t  += bs;
533           if (addv == ADD_VALUES) {
534             for (ii=0; ii<bs; ii++,value+=stepval) {
535               for (jj=ii; jj<bs2; jj+=bs) {
536                 baij_a[jj]  += *value++;
537               }
538             }
539           } else {
540             for (ii=0; ii<bs; ii++,value+=stepval) {
541               for (jj=ii; jj<bs2; jj+=bs) {
542                 baij_a[jj]  = *value++;
543               }
544             }
545           }
546         } else {
547           value = v + j*(stepval+bs)*bs + i*bs;
548           if (addv == ADD_VALUES) {
549             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
550               for (jj=0; jj<bs; jj++) {
551                 baij_a[jj]  += *value++;
552               }
553             }
554           } else {
555             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
556               for (jj=0; jj<bs; jj++) {
557                 baij_a[jj]  = *value++;
558               }
559             }
560           }
561         }
562       }
563     } else {
564       if (!baij->donotstash) {
565         if (roworiented) {
566           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         } else {
568           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
569         }
570       }
571     }
572   }
573 #if defined(PETSC_USE_DEBUG)
574   baij->ht_total_ct = total_ct;
575   baij->ht_insert_ct = insert_ct;
576 #endif
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatGetValues_MPIBAIJ"
582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
583 {
584   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
585   PetscErrorCode ierr;
586   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
587   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
588 
589   PetscFunctionBegin;
590   for (i=0; i<m; i++) {
591     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
592     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
593     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
594       row = idxm[i] - bsrstart;
595       for (j=0; j<n; j++) {
596         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
597         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
598         if (idxn[j] >= bscstart && idxn[j] < bscend){
599           col = idxn[j] - bscstart;
600           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
601         } else {
602           if (!baij->colmap) {
603             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
604           }
605 #if defined (PETSC_USE_CTABLE)
606           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
607           data --;
608 #else
609           data = baij->colmap[idxn[j]/bs]-1;
610 #endif
611           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
612           else {
613             col  = data + idxn[j]%bs;
614             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
615           }
616         }
617       }
618     } else {
619       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620     }
621   }
622  PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatNorm_MPIBAIJ"
627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
628 {
629   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
630   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
633   PetscReal      sum = 0.0;
634   MatScalar      *v;
635 
636   PetscFunctionBegin;
637   if (baij->size == 1) {
638     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
639   } else {
640     if (type == NORM_FROBENIUS) {
641       v = amat->a;
642       nz = amat->nz*bs2;
643       for (i=0; i<nz; i++) {
644 #if defined(PETSC_USE_COMPLEX)
645         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
646 #else
647         sum += (*v)*(*v); v++;
648 #endif
649       }
650       v = bmat->a;
651       nz = bmat->nz*bs2;
652       for (i=0; i<nz; i++) {
653 #if defined(PETSC_USE_COMPLEX)
654         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
655 #else
656         sum += (*v)*(*v); v++;
657 #endif
658       }
659       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
660       *nrm = sqrt(*nrm);
661     } else if (type == NORM_1) { /* max column sum */
662       PetscReal *tmp,*tmp2;
663       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
664       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
665       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
666       v = amat->a; jj = amat->j;
667       for (i=0; i<amat->nz; i++) {
668         for (j=0; j<bs; j++){
669           col = bs*(cstart + *jj) + j; /* column index */
670           for (row=0; row<bs; row++){
671             tmp[col] += PetscAbsScalar(*v);  v++;
672           }
673         }
674         jj++;
675       }
676       v = bmat->a; jj = bmat->j;
677       for (i=0; i<bmat->nz; i++) {
678         for (j=0; j<bs; j++){
679           col = bs*garray[*jj] + j;
680           for (row=0; row<bs; row++){
681             tmp[col] += PetscAbsScalar(*v); v++;
682           }
683         }
684         jj++;
685       }
686       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
687       *nrm = 0.0;
688       for (j=0; j<mat->cmap->N; j++) {
689         if (tmp2[j] > *nrm) *nrm = tmp2[j];
690       }
691       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
692     } else if (type == NORM_INFINITY) { /* max row sum */
693       PetscReal *sums;
694       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
695       sum = 0.0;
696       for (j=0; j<amat->mbs; j++) {
697         for (row=0; row<bs; row++) sums[row] = 0.0;
698         v = amat->a + bs2*amat->i[j];
699         nz = amat->i[j+1]-amat->i[j];
700         for (i=0; i<nz; i++) {
701           for (col=0; col<bs; col++){
702             for (row=0; row<bs; row++){
703               sums[row] += PetscAbsScalar(*v); v++;
704             }
705           }
706         }
707         v = bmat->a + bs2*bmat->i[j];
708         nz = bmat->i[j+1]-bmat->i[j];
709         for (i=0; i<nz; i++) {
710           for (col=0; col<bs; col++){
711             for (row=0; row<bs; row++){
712               sums[row] += PetscAbsScalar(*v); v++;
713             }
714           }
715         }
716         for (row=0; row<bs; row++){
717           if (sums[row] > sum) sum = sums[row];
718         }
719       }
720       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
721       ierr = PetscFree(sums);CHKERRQ(ierr);
722     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 /*
728   Creates the hash table, and sets the table
729   This table is created only once.
730   If new entried need to be added to the matrix
731   then the hash table has to be destroyed and
732   recreated.
733 */
734 #undef __FUNCT__
735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
737 {
738   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
739   Mat            A = baij->A,B=baij->B;
740   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
741   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
742   PetscErrorCode ierr;
743   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
744   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
745   PetscInt       *HT,key;
746   MatScalar      **HD;
747   PetscReal      tmp;
748 #if defined(PETSC_USE_INFO)
749   PetscInt       ct=0,max=0;
750 #endif
751 
752   PetscFunctionBegin;
753   if (baij->ht) PetscFunctionReturn(0);
754 
755   baij->ht_size = (PetscInt)(factor*nz);
756   ht_size       = baij->ht_size;
757 
758   /* Allocate Memory for Hash Table */
759   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
760   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
761   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
762   HD   = baij->hd;
763   HT   = baij->ht;
764 
765   /* Loop Over A */
766   for (i=0; i<a->mbs; i++) {
767     for (j=ai[i]; j<ai[i+1]; j++) {
768       row = i+rstart;
769       col = aj[j]+cstart;
770 
771       key = row*Nbs + col + 1;
772       h1  = HASH(ht_size,key,tmp);
773       for (k=0; k<ht_size; k++){
774         if (!HT[(h1+k)%ht_size]) {
775           HT[(h1+k)%ht_size] = key;
776           HD[(h1+k)%ht_size] = a->a + j*bs2;
777           break;
778 #if defined(PETSC_USE_INFO)
779         } else {
780           ct++;
781 #endif
782         }
783       }
784 #if defined(PETSC_USE_INFO)
785       if (k> max) max = k;
786 #endif
787     }
788   }
789   /* Loop Over B */
790   for (i=0; i<b->mbs; i++) {
791     for (j=bi[i]; j<bi[i+1]; j++) {
792       row = i+rstart;
793       col = garray[bj[j]];
794       key = row*Nbs + col + 1;
795       h1  = HASH(ht_size,key,tmp);
796       for (k=0; k<ht_size; k++){
797         if (!HT[(h1+k)%ht_size]) {
798           HT[(h1+k)%ht_size] = key;
799           HD[(h1+k)%ht_size] = b->a + j*bs2;
800           break;
801 #if defined(PETSC_USE_INFO)
802         } else {
803           ct++;
804 #endif
805         }
806       }
807 #if defined(PETSC_USE_INFO)
808       if (k> max) max = k;
809 #endif
810     }
811   }
812 
813   /* Print Summary */
814 #if defined(PETSC_USE_INFO)
815   for (i=0,j=0; i<ht_size; i++) {
816     if (HT[i]) {j++;}
817   }
818   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
819 #endif
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
826 {
827   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
828   PetscErrorCode ierr;
829   PetscInt       nstash,reallocs;
830   InsertMode     addv;
831 
832   PetscFunctionBegin;
833   if (baij->donotstash || mat->nooffprocentries) {
834     PetscFunctionReturn(0);
835   }
836 
837   /* make sure all processors are either in INSERTMODE or ADDMODE */
838   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
839   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
840   mat->insertmode = addv; /* in case this processor had no cache */
841 
842   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
843   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
844   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
845   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
846   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
847   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
854 {
855   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
856   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
857   PetscErrorCode ierr;
858   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
859   PetscInt       *row,*col;
860   PetscBool      r1,r2,r3,other_disassembled;
861   MatScalar      *val;
862   InsertMode     addv = mat->insertmode;
863   PetscMPIInt    n;
864 
865   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
866   PetscFunctionBegin;
867   if (!baij->donotstash && !mat->nooffprocentries) {
868     while (1) {
869       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
870       if (!flg) break;
871 
872       for (i=0; i<n;) {
873         /* Now identify the consecutive vals belonging to the same row */
874         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
875         if (j < n) ncols = j-i;
876         else       ncols = n-i;
877         /* Now assemble all these values with a single function call */
878         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
879         i = j;
880       }
881     }
882     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
883     /* Now process the block-stash. Since the values are stashed column-oriented,
884        set the roworiented flag to column oriented, and after MatSetValues()
885        restore the original flags */
886     r1 = baij->roworiented;
887     r2 = a->roworiented;
888     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
889     baij->roworiented = PETSC_FALSE;
890     a->roworiented    = PETSC_FALSE;
891     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
892     while (1) {
893       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
894       if (!flg) break;
895 
896       for (i=0; i<n;) {
897         /* Now identify the consecutive vals belonging to the same row */
898         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
899         if (j < n) ncols = j-i;
900         else       ncols = n-i;
901         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
902         i = j;
903       }
904     }
905     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
906     baij->roworiented = r1;
907     a->roworiented    = r2;
908     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
909   }
910 
911   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
912   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
913 
914   /* determine if any processor has disassembled, if so we must
915      also disassemble ourselfs, in order that we may reassemble. */
916   /*
917      if nonzero structure of submatrix B cannot change then we know that
918      no processor disassembled thus we can skip this stuff
919   */
920   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
921     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
922     if (mat->was_assembled && !other_disassembled) {
923       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
924     }
925   }
926 
927   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
928     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
929   }
930   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
931   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
932   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
933 
934 #if defined(PETSC_USE_INFO)
935   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
936     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
937     baij->ht_total_ct  = 0;
938     baij->ht_insert_ct = 0;
939   }
940 #endif
941   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
942     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
943     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
944     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
945   }
946 
947   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
948   baij->rowvalues = 0;
949   PetscFunctionReturn(0);
950 }
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
955 {
956   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
957   PetscErrorCode    ierr;
958   PetscMPIInt       size = baij->size,rank = baij->rank;
959   PetscInt          bs = mat->rmap->bs;
960   PetscBool         iascii,isdraw;
961   PetscViewer       sviewer;
962   PetscViewerFormat format;
963 
964   PetscFunctionBegin;
965   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
966   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
967   if (iascii) {
968     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
969     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
970       MatInfo info;
971       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
972       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
973       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
974              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
977       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
978       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
979       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
980       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
981       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     } else if (format == PETSC_VIEWER_ASCII_INFO) {
984       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
987       PetscFunctionReturn(0);
988     }
989   }
990 
991   if (isdraw) {
992     PetscDraw       draw;
993     PetscBool  isnull;
994     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
995     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
996   }
997 
998   if (size == 1) {
999     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1000     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1001   } else {
1002     /* assemble the entire matrix onto first processor. */
1003     Mat         A;
1004     Mat_SeqBAIJ *Aloc;
1005     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1006     MatScalar   *a;
1007 
1008     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1009     /* Perhaps this should be the type of mat? */
1010     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1011     if (!rank) {
1012       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1013     } else {
1014       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1015     }
1016     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1017     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1018     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1019 
1020     /* copy over the A part */
1021     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1022     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1023     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1024 
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = (baij->cstartbs+aj[j])*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     /* copy over the B part */
1037     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1038     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1039     for (i=0; i<mbs; i++) {
1040       rvals[0] = bs*(baij->rstartbs + i);
1041       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1042       for (j=ai[i]; j<ai[i+1]; j++) {
1043         col = baij->garray[aj[j]]*bs;
1044         for (k=0; k<bs; k++) {
1045           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1046           col++; a += bs;
1047         }
1048       }
1049     }
1050     ierr = PetscFree(rvals);CHKERRQ(ierr);
1051     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     /*
1054        Everyone has to call to draw the matrix since the graphics waits are
1055        synchronized across all processors that share the PetscDraw object
1056     */
1057     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1058     if (!rank) {
1059       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1060     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1061       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1062       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1063     }
1064     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1065     ierr = MatDestroy(A);CHKERRQ(ierr);
1066   }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1072 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1073 {
1074   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1075   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1076   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1077   PetscErrorCode ierr;
1078   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,count,j,k,bs2=a->bs2,header[4],nz,rlen;
1079   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1080   int            fd;
1081   PetscScalar    *column_values;
1082   FILE           *file;
1083   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1084   PetscInt       message_count,flowcontrolcount;
1085 
1086   PetscFunctionBegin;
1087   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1088   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1089   nz   = bs2*(A->nz + B->nz);
1090   rlen = mat->rmap->n;
1091   if (!rank) {
1092     header[0] = MAT_FILE_CLASSID;
1093     header[1] = mat->rmap->N;
1094     header[2] = mat->cmap->N;
1095     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1096     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1097     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1098     /* get largest number of rows any processor has */
1099     range = mat->rmap->range;
1100     for (i=1; i<size; i++) {
1101       rlen = PetscMax(rlen,range[i+1] - range[i]);
1102     }
1103   } else {
1104     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1105   }
1106 
1107   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1108   /* compute lengths of each row  */
1109   count = 0;
1110   for (i=0; i<a->mbs; i++) {
1111     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1112   }
1113   /* store the row lengths to the file */
1114   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1115   if (!rank) {
1116     MPI_Status status;
1117     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1118     rlen  = (range[1] - range[0])/bs;
1119     for (i=0; i<rlen; i++) {
1120       for (j=0; j<bs; j++) {
1121         row_lens[i*bs+j] = bs*crow_lens[i];
1122       }
1123     }
1124     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1125     for (i=1; i<size; i++) {
1126       rlen = (range[i+1] - range[i])/bs;
1127       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1128       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1129       for (k=0; k<rlen; k++) {
1130 	for (j=0; j<bs; j++) {
1131 	  row_lens[k*bs+j] = bs*crow_lens[k];
1132 	}
1133       }
1134       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1135     }
1136     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1137     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1138   } else {
1139     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1140     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1141     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1142   }
1143   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1144 
1145   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1146      information needed to make it for each row from a block row. This does require more communication but still not more than
1147      the communication needed for the nonzero values  */
1148   nzmax = nz; /*  space a largest processor needs */
1149   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1150   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1151   cnt  = 0;
1152   for (i=0; i<a->mbs; i++) {
1153     pcnt = cnt;
1154     for (j=B->i[i]; j<B->i[i+1]; j++) {
1155       if ( (col = garray[B->j[j]]) > cstart) break;
1156       for (l=0; l<bs; l++) {
1157 	column_indices[cnt++] = bs*col+l;
1158       }
1159     }
1160     for (k=A->i[i]; k<A->i[i+1]; k++) {
1161       for (l=0; l<bs; l++) {
1162         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1163       }
1164     }
1165     for (; j<B->i[i+1]; j++) {
1166       for (l=0; l<bs; l++) {
1167         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1168       }
1169     }
1170     len = cnt - pcnt;
1171     for (k=1; k<bs; k++) {
1172       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1173       cnt += len;
1174     }
1175   }
1176   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1177 
1178   /* store the columns to the file */
1179   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1180   if (!rank) {
1181     MPI_Status status;
1182     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1183     for (i=1; i<size; i++) {
1184       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1185       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1186       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1187       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1188     }
1189     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1190   } else {
1191     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1192     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1193     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1194     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1195   }
1196   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1197 
1198   /* load up the numerical values */
1199   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1200   cnt = 0;
1201   for (i=0; i<a->mbs; i++) {
1202     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1203     for (j=B->i[i]; j<B->i[i+1]; j++) {
1204       if ( garray[B->j[j]] > cstart) break;
1205       for (l=0; l<bs; l++) {
1206         for (ll=0; ll<bs; ll++) {
1207 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1208         }
1209       }
1210       cnt += bs;
1211     }
1212     for (k=A->i[i]; k<A->i[i+1]; k++) {
1213       for (l=0; l<bs; l++) {
1214         for (ll=0; ll<bs; ll++) {
1215           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1216         }
1217       }
1218       cnt += bs;
1219     }
1220     for (; j<B->i[i+1]; j++) {
1221       for (l=0; l<bs; l++) {
1222         for (ll=0; ll<bs; ll++) {
1223 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1224         }
1225       }
1226       cnt += bs;
1227     }
1228     cnt += (bs-1)*rlen;
1229   }
1230   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1231 
1232   /* store the column values to the file */
1233   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1234   if (!rank) {
1235     MPI_Status status;
1236     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1237     for (i=1; i<size; i++) {
1238       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1239       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1240       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1246     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1247     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1248     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1249   }
1250   ierr = PetscFree(column_values);CHKERRQ(ierr);
1251 
1252   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1253   if (file) {
1254     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatView_MPIBAIJ"
1261 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1262 {
1263   PetscErrorCode ierr;
1264   PetscBool      iascii,isdraw,issocket,isbinary;
1265 
1266   PetscFunctionBegin;
1267   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1268   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1269   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1270   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1271   if (iascii || isdraw || issocket) {
1272     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1273   } else if (isbinary) {
1274     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1275   } else {
1276     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1277   }
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1283 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1284 {
1285   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1286   PetscErrorCode ierr;
1287 
1288   PetscFunctionBegin;
1289 #if defined(PETSC_USE_LOG)
1290   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1291 #endif
1292   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1293   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1294   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
1295   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
1296 #if defined (PETSC_USE_CTABLE)
1297   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1298 #else
1299   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1300 #endif
1301   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1302   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1303   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1304   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1305   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1306   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1307   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1308   ierr = PetscFree(baij);CHKERRQ(ierr);
1309 
1310   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatMult_MPIBAIJ"
1323 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1324 {
1325   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1326   PetscErrorCode ierr;
1327   PetscInt       nt;
1328 
1329   PetscFunctionBegin;
1330   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1331   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1332   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1333   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1334   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1335   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1336   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1337   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 #undef __FUNCT__
1342 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1343 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1344 {
1345   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1346   PetscErrorCode ierr;
1347 
1348   PetscFunctionBegin;
1349   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1350   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1351   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1352   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1358 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1359 {
1360   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1361   PetscErrorCode ierr;
1362   PetscBool      merged;
1363 
1364   PetscFunctionBegin;
1365   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1366   /* do nondiagonal part */
1367   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1368   if (!merged) {
1369     /* send it on its way */
1370     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1371     /* do local part */
1372     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1373     /* receive remote parts: note this assumes the values are not actually */
1374     /* inserted in yy until the next line */
1375     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1376   } else {
1377     /* do local part */
1378     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1379     /* send it on its way */
1380     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1381     /* values actually were received in the Begin() but we need to call this nop */
1382     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383   }
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 #undef __FUNCT__
1388 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1389 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1390 {
1391   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1392   PetscErrorCode ierr;
1393 
1394   PetscFunctionBegin;
1395   /* do nondiagonal part */
1396   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1397   /* send it on its way */
1398   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1399   /* do local part */
1400   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1401   /* receive remote parts: note this assumes the values are not actually */
1402   /* inserted in yy until the next line, which is true for my implementation*/
1403   /* but is not perhaps always true. */
1404   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 /*
1409   This only works correctly for square matrices where the subblock A->A is the
1410    diagonal block
1411 */
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1414 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1415 {
1416   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1417   PetscErrorCode ierr;
1418 
1419   PetscFunctionBegin;
1420   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1421   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatScale_MPIBAIJ"
1427 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1428 {
1429   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1430   PetscErrorCode ierr;
1431 
1432   PetscFunctionBegin;
1433   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1434   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 #undef __FUNCT__
1439 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1440 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1441 {
1442   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1443   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1444   PetscErrorCode ierr;
1445   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1446   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1447   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1448 
1449   PetscFunctionBegin;
1450   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1451   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1452   mat->getrowactive = PETSC_TRUE;
1453 
1454   if (!mat->rowvalues && (idx || v)) {
1455     /*
1456         allocate enough space to hold information from the longest row.
1457     */
1458     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1459     PetscInt     max = 1,mbs = mat->mbs,tmp;
1460     for (i=0; i<mbs; i++) {
1461       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1462       if (max < tmp) { max = tmp; }
1463     }
1464     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1465   }
1466   lrow = row - brstart;
1467 
1468   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1469   if (!v)   {pvA = 0; pvB = 0;}
1470   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1471   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1472   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1473   nztot = nzA + nzB;
1474 
1475   cmap  = mat->garray;
1476   if (v  || idx) {
1477     if (nztot) {
1478       /* Sort by increasing column numbers, assuming A and B already sorted */
1479       PetscInt imark = -1;
1480       if (v) {
1481         *v = v_p = mat->rowvalues;
1482         for (i=0; i<nzB; i++) {
1483           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1484           else break;
1485         }
1486         imark = i;
1487         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1488         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1489       }
1490       if (idx) {
1491         *idx = idx_p = mat->rowindices;
1492         if (imark > -1) {
1493           for (i=0; i<imark; i++) {
1494             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1495           }
1496         } else {
1497           for (i=0; i<nzB; i++) {
1498             if (cmap[cworkB[i]/bs] < cstart)
1499               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1500             else break;
1501           }
1502           imark = i;
1503         }
1504         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1505         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1506       }
1507     } else {
1508       if (idx) *idx = 0;
1509       if (v)   *v   = 0;
1510     }
1511   }
1512   *nz = nztot;
1513   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1514   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1520 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1521 {
1522   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1523 
1524   PetscFunctionBegin;
1525   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1526   baij->getrowactive = PETSC_FALSE;
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1532 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1533 {
1534   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1535   PetscErrorCode ierr;
1536 
1537   PetscFunctionBegin;
1538   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1539   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1545 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1546 {
1547   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1548   Mat            A = a->A,B = a->B;
1549   PetscErrorCode ierr;
1550   PetscReal      isend[5],irecv[5];
1551 
1552   PetscFunctionBegin;
1553   info->block_size     = (PetscReal)matin->rmap->bs;
1554   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1555   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1556   isend[3] = info->memory;  isend[4] = info->mallocs;
1557   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1558   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1559   isend[3] += info->memory;  isend[4] += info->mallocs;
1560   if (flag == MAT_LOCAL) {
1561     info->nz_used      = isend[0];
1562     info->nz_allocated = isend[1];
1563     info->nz_unneeded  = isend[2];
1564     info->memory       = isend[3];
1565     info->mallocs      = isend[4];
1566   } else if (flag == MAT_GLOBAL_MAX) {
1567     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1568     info->nz_used      = irecv[0];
1569     info->nz_allocated = irecv[1];
1570     info->nz_unneeded  = irecv[2];
1571     info->memory       = irecv[3];
1572     info->mallocs      = irecv[4];
1573   } else if (flag == MAT_GLOBAL_SUM) {
1574     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1575     info->nz_used      = irecv[0];
1576     info->nz_allocated = irecv[1];
1577     info->nz_unneeded  = irecv[2];
1578     info->memory       = irecv[3];
1579     info->mallocs      = irecv[4];
1580   } else {
1581     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1582   }
1583   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1584   info->fill_ratio_needed = 0;
1585   info->factor_mallocs    = 0;
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1591 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1592 {
1593   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1594   PetscErrorCode ierr;
1595 
1596   PetscFunctionBegin;
1597   switch (op) {
1598   case MAT_NEW_NONZERO_LOCATIONS:
1599   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1600   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1601   case MAT_KEEP_NONZERO_PATTERN:
1602   case MAT_NEW_NONZERO_LOCATION_ERR:
1603     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1604     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1605     break;
1606   case MAT_ROW_ORIENTED:
1607     a->roworiented = flg;
1608     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1609     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1610     break;
1611   case MAT_NEW_DIAGONALS:
1612     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1613     break;
1614   case MAT_IGNORE_OFF_PROC_ENTRIES:
1615     a->donotstash = flg;
1616     break;
1617   case MAT_USE_HASH_TABLE:
1618     a->ht_flag = flg;
1619     break;
1620   case MAT_SYMMETRIC:
1621   case MAT_STRUCTURALLY_SYMMETRIC:
1622   case MAT_HERMITIAN:
1623   case MAT_SYMMETRY_ETERNAL:
1624     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1625     break;
1626   default:
1627     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1628   }
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1634 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1635 {
1636   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1637   Mat_SeqBAIJ    *Aloc;
1638   Mat            B;
1639   PetscErrorCode ierr;
1640   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1641   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1642   MatScalar      *a;
1643 
1644   PetscFunctionBegin;
1645   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1646   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1647     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1648     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1649     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1650     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1651   } else {
1652     B = *matout;
1653   }
1654 
1655   /* copy over the A part */
1656   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1657   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1658   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1659 
1660   for (i=0; i<mbs; i++) {
1661     rvals[0] = bs*(baij->rstartbs + i);
1662     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1663     for (j=ai[i]; j<ai[i+1]; j++) {
1664       col = (baij->cstartbs+aj[j])*bs;
1665       for (k=0; k<bs; k++) {
1666         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1667         col++; a += bs;
1668       }
1669     }
1670   }
1671   /* copy over the B part */
1672   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1673   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1674   for (i=0; i<mbs; i++) {
1675     rvals[0] = bs*(baij->rstartbs + i);
1676     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1677     for (j=ai[i]; j<ai[i+1]; j++) {
1678       col = baij->garray[aj[j]]*bs;
1679       for (k=0; k<bs; k++) {
1680         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1681         col++; a += bs;
1682       }
1683     }
1684   }
1685   ierr = PetscFree(rvals);CHKERRQ(ierr);
1686   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1687   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1688 
1689   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1690     *matout = B;
1691   } else {
1692     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1693   }
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1699 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1700 {
1701   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1702   Mat            a = baij->A,b = baij->B;
1703   PetscErrorCode ierr;
1704   PetscInt       s1,s2,s3;
1705 
1706   PetscFunctionBegin;
1707   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1708   if (rr) {
1709     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1710     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1711     /* Overlap communication with computation. */
1712     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1713   }
1714   if (ll) {
1715     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1716     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1717     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1718   }
1719   /* scale  the diagonal block */
1720   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1721 
1722   if (rr) {
1723     /* Do a scatter end and then right scale the off-diagonal block */
1724     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1725     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1726   }
1727 
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1733 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1734 {
1735   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1736   PetscErrorCode ierr;
1737   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1738   PetscInt       i,*owners = A->rmap->range;
1739   PetscInt       *nprocs,j,idx,nsends,row;
1740   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1741   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1742   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1743   MPI_Comm       comm = ((PetscObject)A)->comm;
1744   MPI_Request    *send_waits,*recv_waits;
1745   MPI_Status     recv_status,*send_status;
1746 #if defined(PETSC_DEBUG)
1747   PetscBool      found = PETSC_FALSE;
1748 #endif
1749 
1750   PetscFunctionBegin;
1751   /*  first count number of contributors to each processor */
1752   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1753   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1755   j = 0;
1756   for (i=0; i<N; i++) {
1757     if (lastidx > (idx = rows[i])) j = 0;
1758     lastidx = idx;
1759     for (; j<size; j++) {
1760       if (idx >= owners[j] && idx < owners[j+1]) {
1761         nprocs[2*j]++;
1762         nprocs[2*j+1] = 1;
1763         owner[i] = j;
1764 #if defined(PETSC_DEBUG)
1765         found = PETSC_TRUE;
1766 #endif
1767         break;
1768       }
1769     }
1770 #if defined(PETSC_DEBUG)
1771     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1772     found = PETSC_FALSE;
1773 #endif
1774   }
1775   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1776 
1777   if (A->nooffproczerorows) {
1778     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1779     nrecvs = nsends;
1780     nmax   = N;
1781   } else {
1782     /* inform other processors of number of messages and max length*/
1783     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1784   }
1785 
1786   /* post receives:   */
1787   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1788   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1789   for (i=0; i<nrecvs; i++) {
1790     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1791   }
1792 
1793   /* do sends:
1794      1) starts[i] gives the starting index in svalues for stuff going to
1795      the ith processor
1796   */
1797   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1798   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1799   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1800   starts[0]  = 0;
1801   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1802   for (i=0; i<N; i++) {
1803     svalues[starts[owner[i]]++] = rows[i];
1804   }
1805 
1806   starts[0] = 0;
1807   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1808   count = 0;
1809   for (i=0; i<size; i++) {
1810     if (nprocs[2*i+1]) {
1811       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1812     }
1813   }
1814   ierr = PetscFree(starts);CHKERRQ(ierr);
1815 
1816   base = owners[rank];
1817 
1818   /*  wait on receives */
1819   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1820   count  = nrecvs;
1821   slen = 0;
1822   while (count) {
1823     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1824     /* unpack receives into our local space */
1825     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1826     source[imdex]  = recv_status.MPI_SOURCE;
1827     lens[imdex]    = n;
1828     slen          += n;
1829     count--;
1830   }
1831   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1832 
1833   /* move the data into the send scatter */
1834   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1835   count = 0;
1836   for (i=0; i<nrecvs; i++) {
1837     values = rvalues + i*nmax;
1838     for (j=0; j<lens[i]; j++) {
1839       lrows[count++] = values[j] - base;
1840     }
1841   }
1842   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1843   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1844   ierr = PetscFree(owner);CHKERRQ(ierr);
1845   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1846 
1847   /* actually zap the local rows */
1848   /*
1849         Zero the required rows. If the "diagonal block" of the matrix
1850      is square and the user wishes to set the diagonal we use separate
1851      code so that MatSetValues() is not called for each diagonal allocating
1852      new memory, thus calling lots of mallocs and slowing things down.
1853 
1854   */
1855   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1856   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1857   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1858     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1859   } else if (diag != 0.0) {
1860     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1861     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1862        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1863     for (i=0; i<slen; i++) {
1864       row  = lrows[i] + rstart_bs;
1865       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1866     }
1867     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1868     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1869   } else {
1870     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1871   }
1872 
1873   ierr = PetscFree(lrows);CHKERRQ(ierr);
1874 
1875   /* wait on sends */
1876   if (nsends) {
1877     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1878     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1879     ierr = PetscFree(send_status);CHKERRQ(ierr);
1880   }
1881   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1882   ierr = PetscFree(svalues);CHKERRQ(ierr);
1883 
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 #undef __FUNCT__
1888 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1889 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1890 {
1891   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1892   PetscErrorCode ierr;
1893 
1894   PetscFunctionBegin;
1895   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1900 
1901 #undef __FUNCT__
1902 #define __FUNCT__ "MatEqual_MPIBAIJ"
1903 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1904 {
1905   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1906   Mat            a,b,c,d;
1907   PetscBool      flg;
1908   PetscErrorCode ierr;
1909 
1910   PetscFunctionBegin;
1911   a = matA->A; b = matA->B;
1912   c = matB->A; d = matB->B;
1913 
1914   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1915   if (flg) {
1916     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1917   }
1918   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatCopy_MPIBAIJ"
1924 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1925 {
1926   PetscErrorCode ierr;
1927   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1928   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1929 
1930   PetscFunctionBegin;
1931   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1932   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1933     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1934   } else {
1935     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1936     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1937   }
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 #undef __FUNCT__
1942 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1943 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1944 {
1945   PetscErrorCode ierr;
1946 
1947   PetscFunctionBegin;
1948   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1954 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1955 {
1956   PetscErrorCode ierr;
1957   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1958   PetscBLASInt   bnz,one=1;
1959   Mat_SeqBAIJ    *x,*y;
1960 
1961   PetscFunctionBegin;
1962   if (str == SAME_NONZERO_PATTERN) {
1963     PetscScalar alpha = a;
1964     x = (Mat_SeqBAIJ *)xx->A->data;
1965     y = (Mat_SeqBAIJ *)yy->A->data;
1966     bnz = PetscBLASIntCast(x->nz);
1967     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1968     x = (Mat_SeqBAIJ *)xx->B->data;
1969     y = (Mat_SeqBAIJ *)yy->B->data;
1970     bnz = PetscBLASIntCast(x->nz);
1971     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1972   } else {
1973     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1974   }
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 #undef __FUNCT__
1979 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1980 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1981 {
1982   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1983   PetscInt rbs,cbs;
1984   PetscErrorCode ierr;
1985 
1986   PetscFunctionBegin;
1987   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1988   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1989   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1990   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1991   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1992   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 #undef __FUNCT__
1997 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1998 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1999 {
2000   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2001   PetscErrorCode ierr;
2002 
2003   PetscFunctionBegin;
2004   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2005   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2011 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2012 {
2013   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2014   PetscErrorCode ierr;
2015 
2016   PetscFunctionBegin;
2017   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2018   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 #undef __FUNCT__
2023 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2024 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2025 {
2026   PetscErrorCode ierr;
2027   IS             iscol_local;
2028   PetscInt       csize;
2029 
2030   PetscFunctionBegin;
2031   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2032   if (call == MAT_REUSE_MATRIX) {
2033     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2034     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2035   } else {
2036     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2037   }
2038   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2039   if (call == MAT_INITIAL_MATRIX) {
2040     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2041     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
2042   }
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 #undef __FUNCT__
2047 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2048 /*
2049     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2050   in local and then by concatenating the local matrices the end result.
2051   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2052 */
2053 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2054 {
2055   PetscErrorCode ierr;
2056   PetscMPIInt    rank,size;
2057   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2058   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2059   Mat            *local,M,Mreuse;
2060   MatScalar      *vwork,*aa;
2061   MPI_Comm       comm = ((PetscObject)mat)->comm;
2062   Mat_SeqBAIJ    *aij;
2063 
2064 
2065   PetscFunctionBegin;
2066   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2067   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2068 
2069   if (call ==  MAT_REUSE_MATRIX) {
2070     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2071     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2072     local = &Mreuse;
2073     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2074   } else {
2075     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2076     Mreuse = *local;
2077     ierr   = PetscFree(local);CHKERRQ(ierr);
2078   }
2079 
2080   /*
2081       m - number of local rows
2082       n - number of columns (same on all processors)
2083       rstart - first row in new global matrix generated
2084   */
2085   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2086   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2087   m    = m/bs;
2088   n    = n/bs;
2089 
2090   if (call == MAT_INITIAL_MATRIX) {
2091     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2092     ii  = aij->i;
2093     jj  = aij->j;
2094 
2095     /*
2096         Determine the number of non-zeros in the diagonal and off-diagonal
2097         portions of the matrix in order to do correct preallocation
2098     */
2099 
2100     /* first get start and end of "diagonal" columns */
2101     if (csize == PETSC_DECIDE) {
2102       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2103       if (mglobal == n*bs) { /* square matrix */
2104 	nlocal = m;
2105       } else {
2106         nlocal = n/size + ((n % size) > rank);
2107       }
2108     } else {
2109       nlocal = csize/bs;
2110     }
2111     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2112     rstart = rend - nlocal;
2113     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2114 
2115     /* next, compute all the lengths */
2116     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2117     olens = dlens + m;
2118     for (i=0; i<m; i++) {
2119       jend = ii[i+1] - ii[i];
2120       olen = 0;
2121       dlen = 0;
2122       for (j=0; j<jend; j++) {
2123         if (*jj < rstart || *jj >= rend) olen++;
2124         else dlen++;
2125         jj++;
2126       }
2127       olens[i] = olen;
2128       dlens[i] = dlen;
2129     }
2130     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2131     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2132     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2133     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2134     ierr = PetscFree(dlens);CHKERRQ(ierr);
2135   } else {
2136     PetscInt ml,nl;
2137 
2138     M = *newmat;
2139     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2140     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2141     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2142     /*
2143          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2144        rather than the slower MatSetValues().
2145     */
2146     M->was_assembled = PETSC_TRUE;
2147     M->assembled     = PETSC_FALSE;
2148   }
2149   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2150   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2151   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2152   ii  = aij->i;
2153   jj  = aij->j;
2154   aa  = aij->a;
2155   for (i=0; i<m; i++) {
2156     row   = rstart/bs + i;
2157     nz    = ii[i+1] - ii[i];
2158     cwork = jj;     jj += nz;
2159     vwork = aa;     aa += nz;
2160     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2161   }
2162 
2163   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2164   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2165   *newmat = M;
2166 
2167   /* save submatrix used in processor for next request */
2168   if (call ==  MAT_INITIAL_MATRIX) {
2169     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2170     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2171   }
2172 
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 #undef __FUNCT__
2177 #define __FUNCT__ "MatPermute_MPIBAIJ"
2178 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2179 {
2180   MPI_Comm       comm,pcomm;
2181   PetscInt       first,local_size,nrows;
2182   const PetscInt *rows;
2183   PetscMPIInt    size;
2184   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2185   PetscErrorCode ierr;
2186 
2187   PetscFunctionBegin;
2188   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2189   /* make a collective version of 'rowp' */
2190   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2191   if (pcomm==comm) {
2192     crowp = rowp;
2193   } else {
2194     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2195     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2196     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2197     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2198   }
2199   /* collect the global row permutation and invert it */
2200   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2201   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2202   if (pcomm!=comm) {
2203     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2204   }
2205   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2206   /* get the local target indices */
2207   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2208   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2209   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2210   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2211   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2212   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2213   /* the column permutation is so much easier;
2214      make a local version of 'colp' and invert it */
2215   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2216   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2217   if (size==1) {
2218     lcolp = colp;
2219   } else {
2220     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2221     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2222     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2223   }
2224   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2225   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2226   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2227   if (size>1) {
2228     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2229     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2230   }
2231   /* now we just get the submatrix */
2232   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2233   /* clean up */
2234   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2235   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 #undef __FUNCT__
2240 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2241 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2242 {
2243   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2244   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2245 
2246   PetscFunctionBegin;
2247   if (nghosts) { *nghosts = B->nbs;}
2248   if (ghosts) {*ghosts = baij->garray;}
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2253 
2254 #undef __FUNCT__
2255 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2256 /*
2257     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2258 */
2259 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2260 {
2261   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2262   PetscErrorCode        ierr;
2263   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2264   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2265   const PetscInt        *is;
2266   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2267   PetscInt              *rowhit,M,cstart,cend,colb;
2268   PetscInt              *columnsforrow,l;
2269   IS                    *isa;
2270   PetscBool              done,flg;
2271   ISLocalToGlobalMapping map = mat->bmapping;
2272   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2273 
2274   PetscFunctionBegin;
2275   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2276   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2277 
2278   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2279   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2280   M                = mat->rmap->n/bs;
2281   cstart           = mat->cmap->rstart/bs;
2282   cend             = mat->cmap->rend/bs;
2283   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2284   c->N             = mat->cmap->N/bs;
2285   c->m             = mat->rmap->n/bs;
2286   c->rstart        = mat->rmap->rstart/bs;
2287 
2288   c->ncolors       = nis;
2289   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2290   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2291   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2292   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2293   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2294   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2295 
2296   /* Allow access to data structures of local part of matrix */
2297   if (!baij->colmap) {
2298     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2299   }
2300   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2301   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2302 
2303   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2304   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2305 
2306   for (i=0; i<nis; i++) {
2307     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2308     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2309     c->ncolumns[i] = n;
2310     if (n) {
2311       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2312       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2313       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2314     } else {
2315       c->columns[i]  = 0;
2316     }
2317 
2318     if (ctype == IS_COLORING_GLOBAL){
2319       /* Determine the total (parallel) number of columns of this color */
2320       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2321       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2322 
2323       nn   = PetscMPIIntCast(n);
2324       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2325       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2326       if (!nctot) {
2327         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2328       }
2329 
2330       disp[0] = 0;
2331       for (j=1; j<size; j++) {
2332         disp[j] = disp[j-1] + ncolsonproc[j-1];
2333       }
2334 
2335       /* Get complete list of columns for color on each processor */
2336       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2337       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2338       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2339     } else if (ctype == IS_COLORING_GHOSTED){
2340       /* Determine local number of columns of this color on this process, including ghost points */
2341       nctot = n;
2342       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2343       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2344     } else {
2345       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2346     }
2347 
2348     /*
2349        Mark all rows affect by these columns
2350     */
2351     /* Temporary option to allow for debugging/testing */
2352     flg  = PETSC_FALSE;
2353     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2354     if (!flg) {/*-----------------------------------------------------------------------------*/
2355       /* crude, fast version */
2356       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2357       /* loop over columns*/
2358       for (j=0; j<nctot; j++) {
2359         if (ctype == IS_COLORING_GHOSTED) {
2360           col = ltog[cols[j]];
2361         } else {
2362           col  = cols[j];
2363         }
2364         if (col >= cstart && col < cend) {
2365           /* column is in diagonal block of matrix */
2366           rows = A_cj + A_ci[col-cstart];
2367           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2368         } else {
2369 #if defined (PETSC_USE_CTABLE)
2370           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2371 	  colb --;
2372 #else
2373           colb = baij->colmap[col] - 1;
2374 #endif
2375           if (colb == -1) {
2376             m = 0;
2377           } else {
2378             colb = colb/bs;
2379             rows = B_cj + B_ci[colb];
2380             m    = B_ci[colb+1] - B_ci[colb];
2381           }
2382         }
2383         /* loop over columns marking them in rowhit */
2384         for (k=0; k<m; k++) {
2385           rowhit[*rows++] = col + 1;
2386         }
2387       }
2388 
2389       /* count the number of hits */
2390       nrows = 0;
2391       for (j=0; j<M; j++) {
2392         if (rowhit[j]) nrows++;
2393       }
2394       c->nrows[i]         = nrows;
2395       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2396       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2397       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2398       nrows = 0;
2399       for (j=0; j<M; j++) {
2400         if (rowhit[j]) {
2401           c->rows[i][nrows]           = j;
2402           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2403           nrows++;
2404         }
2405       }
2406     } else {/*-------------------------------------------------------------------------------*/
2407       /* slow version, using rowhit as a linked list */
2408       PetscInt currentcol,fm,mfm;
2409       rowhit[M] = M;
2410       nrows     = 0;
2411       /* loop over columns*/
2412       for (j=0; j<nctot; j++) {
2413         if (ctype == IS_COLORING_GHOSTED) {
2414           col = ltog[cols[j]];
2415         } else {
2416           col  = cols[j];
2417         }
2418         if (col >= cstart && col < cend) {
2419           /* column is in diagonal block of matrix */
2420           rows = A_cj + A_ci[col-cstart];
2421           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2422         } else {
2423 #if defined (PETSC_USE_CTABLE)
2424           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2425           colb --;
2426 #else
2427           colb = baij->colmap[col] - 1;
2428 #endif
2429           if (colb == -1) {
2430             m = 0;
2431           } else {
2432             colb = colb/bs;
2433             rows = B_cj + B_ci[colb];
2434             m    = B_ci[colb+1] - B_ci[colb];
2435           }
2436         }
2437 
2438         /* loop over columns marking them in rowhit */
2439         fm    = M; /* fm points to first entry in linked list */
2440         for (k=0; k<m; k++) {
2441           currentcol = *rows++;
2442 	  /* is it already in the list? */
2443           do {
2444             mfm  = fm;
2445             fm   = rowhit[fm];
2446           } while (fm < currentcol);
2447           /* not in list so add it */
2448           if (fm != currentcol) {
2449             nrows++;
2450             columnsforrow[currentcol] = col;
2451             /* next three lines insert new entry into linked list */
2452             rowhit[mfm]               = currentcol;
2453             rowhit[currentcol]        = fm;
2454             fm                        = currentcol;
2455             /* fm points to present position in list since we know the columns are sorted */
2456           } else {
2457             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2458           }
2459         }
2460       }
2461       c->nrows[i]         = nrows;
2462       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2463       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2464       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2465       /* now store the linked list of rows into c->rows[i] */
2466       nrows = 0;
2467       fm    = rowhit[M];
2468       do {
2469         c->rows[i][nrows]            = fm;
2470         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2471         fm                           = rowhit[fm];
2472       } while (fm < M);
2473     } /* ---------------------------------------------------------------------------------------*/
2474     ierr = PetscFree(cols);CHKERRQ(ierr);
2475   }
2476 
2477   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2478   /*
2479        vscale will contain the "diagonal" on processor scalings followed by the off processor
2480   */
2481   if (ctype == IS_COLORING_GLOBAL) {
2482     PetscInt *garray;
2483     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2484     for (i=0; i<baij->B->cmap->n/bs; i++) {
2485       for (j=0; j<bs; j++) {
2486         garray[i*bs+j] = bs*baij->garray[i]+j;
2487       }
2488     }
2489     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2490     ierr = PetscFree(garray);CHKERRQ(ierr);
2491     CHKMEMQ;
2492     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2493     for (k=0; k<c->ncolors; k++) {
2494       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2495       for (l=0; l<c->nrows[k]; l++) {
2496         col = c->columnsforrow[k][l];
2497         if (col >= cstart && col < cend) {
2498           /* column is in diagonal block of matrix */
2499           colb = col - cstart;
2500         } else {
2501           /* column  is in "off-processor" part */
2502 #if defined (PETSC_USE_CTABLE)
2503           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2504           colb --;
2505 #else
2506           colb = baij->colmap[col] - 1;
2507 #endif
2508           colb = colb/bs;
2509           colb += cend - cstart;
2510         }
2511         c->vscaleforrow[k][l] = colb;
2512       }
2513     }
2514   } else if (ctype == IS_COLORING_GHOSTED) {
2515     /* Get gtol mapping */
2516     PetscInt N = mat->cmap->N, *gtol;
2517     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2518     for (i=0; i<N; i++) gtol[i] = -1;
2519     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2520 
2521     c->vscale = 0; /* will be created in MatFDColoringApply() */
2522     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2523     for (k=0; k<c->ncolors; k++) {
2524       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2525       for (l=0; l<c->nrows[k]; l++) {
2526         col = c->columnsforrow[k][l];      /* global column index */
2527         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2528       }
2529     }
2530     ierr = PetscFree(gtol);CHKERRQ(ierr);
2531   }
2532   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2533 
2534   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2535   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2536   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2537   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2538     CHKMEMQ;
2539   PetscFunctionReturn(0);
2540 }
2541 
2542 #undef __FUNCT__
2543 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2544 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2545 {
2546   Mat            B;
2547   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2548   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2549   Mat_SeqAIJ     *b;
2550   PetscErrorCode ierr;
2551   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2552   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2553   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2554 
2555   PetscFunctionBegin;
2556   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2557   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2558 
2559   /* ----------------------------------------------------------------
2560      Tell every processor the number of nonzeros per row
2561   */
2562   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2563   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2564     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2565   }
2566   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2567   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2568   displs     = recvcounts + size;
2569   for (i=0; i<size; i++) {
2570     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2571     displs[i]     = A->rmap->range[i]/bs;
2572   }
2573 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2574   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2575 #else
2576   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2577 #endif
2578   /* ---------------------------------------------------------------
2579      Create the sequential matrix of the same type as the local block diagonal
2580   */
2581   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2582   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2583   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2584   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2585   b = (Mat_SeqAIJ *)B->data;
2586 
2587   /*--------------------------------------------------------------------
2588     Copy my part of matrix column indices over
2589   */
2590   sendcount  = ad->nz + bd->nz;
2591   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2592   a_jsendbuf = ad->j;
2593   b_jsendbuf = bd->j;
2594   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2595   cnt        = 0;
2596   for (i=0; i<n; i++) {
2597 
2598     /* put in lower diagonal portion */
2599     m = bd->i[i+1] - bd->i[i];
2600     while (m > 0) {
2601       /* is it above diagonal (in bd (compressed) numbering) */
2602       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2603       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2604       m--;
2605     }
2606 
2607     /* put in diagonal portion */
2608     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2609       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2610     }
2611 
2612     /* put in upper diagonal portion */
2613     while (m-- > 0) {
2614       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2615     }
2616   }
2617   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2618 
2619   /*--------------------------------------------------------------------
2620     Gather all column indices to all processors
2621   */
2622   for (i=0; i<size; i++) {
2623     recvcounts[i] = 0;
2624     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2625       recvcounts[i] += lens[j];
2626     }
2627   }
2628   displs[0]  = 0;
2629   for (i=1; i<size; i++) {
2630     displs[i] = displs[i-1] + recvcounts[i-1];
2631   }
2632 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2633   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2634 #else
2635   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2636 #endif
2637   /*--------------------------------------------------------------------
2638     Assemble the matrix into useable form (note numerical values not yet set)
2639   */
2640   /* set the b->ilen (length of each row) values */
2641   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2642   /* set the b->i indices */
2643   b->i[0] = 0;
2644   for (i=1; i<=A->rmap->N/bs; i++) {
2645     b->i[i] = b->i[i-1] + lens[i-1];
2646   }
2647   ierr = PetscFree(lens);CHKERRQ(ierr);
2648   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2649   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2650   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2651 
2652   if (A->symmetric){
2653     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2654   } else if (A->hermitian) {
2655     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2656   } else if (A->structurally_symmetric) {
2657     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2658   }
2659   *newmat = B;
2660   PetscFunctionReturn(0);
2661 }
2662 
2663 #undef __FUNCT__
2664 #define __FUNCT__ "MatSOR_MPIBAIJ"
2665 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2666 {
2667   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2668   PetscErrorCode ierr;
2669   Vec            bb1 = 0;
2670 
2671   PetscFunctionBegin;
2672   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2673     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2674   }
2675 
2676   if (flag == SOR_APPLY_UPPER) {
2677     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2678     PetscFunctionReturn(0);
2679   }
2680 
2681   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2682     if (flag & SOR_ZERO_INITIAL_GUESS) {
2683       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2684       its--;
2685     }
2686 
2687     while (its--) {
2688       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2689       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2690 
2691       /* update rhs: bb1 = bb - B*x */
2692       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2693       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2694 
2695       /* local sweep */
2696       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2697     }
2698   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2699     if (flag & SOR_ZERO_INITIAL_GUESS) {
2700       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2701       its--;
2702     }
2703     while (its--) {
2704       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2705       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2706 
2707       /* update rhs: bb1 = bb - B*x */
2708       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2709       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2710 
2711       /* local sweep */
2712       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2713     }
2714   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2715     if (flag & SOR_ZERO_INITIAL_GUESS) {
2716       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2717       its--;
2718     }
2719     while (its--) {
2720       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2721       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2722 
2723       /* update rhs: bb1 = bb - B*x */
2724       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2725       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2726 
2727       /* local sweep */
2728       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2729     }
2730   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2731 
2732   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2733   PetscFunctionReturn(0);
2734 }
2735 
2736 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2737 
2738 
2739 /* -------------------------------------------------------------------*/
2740 static struct _MatOps MatOps_Values = {
2741        MatSetValues_MPIBAIJ,
2742        MatGetRow_MPIBAIJ,
2743        MatRestoreRow_MPIBAIJ,
2744        MatMult_MPIBAIJ,
2745 /* 4*/ MatMultAdd_MPIBAIJ,
2746        MatMultTranspose_MPIBAIJ,
2747        MatMultTransposeAdd_MPIBAIJ,
2748        0,
2749        0,
2750        0,
2751 /*10*/ 0,
2752        0,
2753        0,
2754        MatSOR_MPIBAIJ,
2755        MatTranspose_MPIBAIJ,
2756 /*15*/ MatGetInfo_MPIBAIJ,
2757        MatEqual_MPIBAIJ,
2758        MatGetDiagonal_MPIBAIJ,
2759        MatDiagonalScale_MPIBAIJ,
2760        MatNorm_MPIBAIJ,
2761 /*20*/ MatAssemblyBegin_MPIBAIJ,
2762        MatAssemblyEnd_MPIBAIJ,
2763        MatSetOption_MPIBAIJ,
2764        MatZeroEntries_MPIBAIJ,
2765 /*24*/ MatZeroRows_MPIBAIJ,
2766        0,
2767        0,
2768        0,
2769        0,
2770 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2771        0,
2772        0,
2773        0,
2774        0,
2775 /*34*/ MatDuplicate_MPIBAIJ,
2776        0,
2777        0,
2778        0,
2779        0,
2780 /*39*/ MatAXPY_MPIBAIJ,
2781        MatGetSubMatrices_MPIBAIJ,
2782        MatIncreaseOverlap_MPIBAIJ,
2783        MatGetValues_MPIBAIJ,
2784        MatCopy_MPIBAIJ,
2785 /*44*/ 0,
2786        MatScale_MPIBAIJ,
2787        0,
2788        0,
2789        0,
2790 /*49*/ MatSetBlockSize_MPIBAIJ,
2791        0,
2792        0,
2793        0,
2794        0,
2795 /*54*/ MatFDColoringCreate_MPIBAIJ,
2796        0,
2797        MatSetUnfactored_MPIBAIJ,
2798        MatPermute_MPIBAIJ,
2799        MatSetValuesBlocked_MPIBAIJ,
2800 /*59*/ MatGetSubMatrix_MPIBAIJ,
2801        MatDestroy_MPIBAIJ,
2802        MatView_MPIBAIJ,
2803        0,
2804        0,
2805 /*64*/ 0,
2806        0,
2807        0,
2808        0,
2809        0,
2810 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2811        0,
2812        0,
2813        0,
2814        0,
2815 /*74*/ 0,
2816        MatFDColoringApply_BAIJ,
2817        0,
2818        0,
2819        0,
2820 /*79*/ 0,
2821        0,
2822        0,
2823        0,
2824        MatLoad_MPIBAIJ,
2825 /*84*/ 0,
2826        0,
2827        0,
2828        0,
2829        0,
2830 /*89*/ 0,
2831        0,
2832        0,
2833        0,
2834        0,
2835 /*94*/ 0,
2836        0,
2837        0,
2838        0,
2839        0,
2840 /*99*/ 0,
2841        0,
2842        0,
2843        0,
2844        0,
2845 /*104*/0,
2846        MatRealPart_MPIBAIJ,
2847        MatImaginaryPart_MPIBAIJ,
2848        0,
2849        0,
2850 /*109*/0,
2851        0,
2852        0,
2853        0,
2854        0,
2855 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2856        0,
2857        MatGetGhosts_MPIBAIJ,
2858        0,
2859        0,
2860 /*119*/0,
2861        0,
2862        0,
2863        0
2864 };
2865 
2866 EXTERN_C_BEGIN
2867 #undef __FUNCT__
2868 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2869 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool  *iscopy,MatReuse reuse,Mat *a)
2870 {
2871   PetscFunctionBegin;
2872   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2873   *iscopy = PETSC_FALSE;
2874   PetscFunctionReturn(0);
2875 }
2876 EXTERN_C_END
2877 
2878 EXTERN_C_BEGIN
2879 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2880 EXTERN_C_END
2881 
2882 EXTERN_C_BEGIN
2883 #undef __FUNCT__
2884 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2885 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2886 {
2887   PetscInt       m,rstart,cstart,cend;
2888   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2889   const PetscInt *JJ=0;
2890   PetscScalar    *values=0;
2891   PetscErrorCode ierr;
2892 
2893   PetscFunctionBegin;
2894 
2895   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2896   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2897   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2898   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2899   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2900   m      = B->rmap->n/bs;
2901   rstart = B->rmap->rstart/bs;
2902   cstart = B->cmap->rstart/bs;
2903   cend   = B->cmap->rend/bs;
2904 
2905   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2906   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2907   for (i=0; i<m; i++) {
2908     nz = ii[i+1] - ii[i];
2909     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2910     nz_max = PetscMax(nz_max,nz);
2911     JJ  = jj + ii[i];
2912     for (j=0; j<nz; j++) {
2913       if (*JJ >= cstart) break;
2914       JJ++;
2915     }
2916     d = 0;
2917     for (; j<nz; j++) {
2918       if (*JJ++ >= cend) break;
2919       d++;
2920     }
2921     d_nnz[i] = d;
2922     o_nnz[i] = nz - d;
2923   }
2924   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2925   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2926 
2927   values = (PetscScalar*)V;
2928   if (!values) {
2929     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2930     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2931   }
2932   for (i=0; i<m; i++) {
2933     PetscInt          row    = i + rstart;
2934     PetscInt          ncols  = ii[i+1] - ii[i];
2935     const PetscInt    *icols = jj + ii[i];
2936     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2937     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2938   }
2939 
2940   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2941   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2942   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2943 
2944   PetscFunctionReturn(0);
2945 }
2946 EXTERN_C_END
2947 
2948 #undef __FUNCT__
2949 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2950 /*@C
2951    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2952    (the default parallel PETSc format).
2953 
2954    Collective on MPI_Comm
2955 
2956    Input Parameters:
2957 +  A - the matrix
2958 .  bs - the block size
2959 .  i - the indices into j for the start of each local row (starts with zero)
2960 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2961 -  v - optional values in the matrix
2962 
2963    Level: developer
2964 
2965 .keywords: matrix, aij, compressed row, sparse, parallel
2966 
2967 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2968 @*/
2969 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2970 {
2971   PetscErrorCode ierr;
2972 
2973   PetscFunctionBegin;
2974   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2975   PetscFunctionReturn(0);
2976 }
2977 
2978 EXTERN_C_BEGIN
2979 #undef __FUNCT__
2980 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2981 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2982 {
2983   Mat_MPIBAIJ    *b;
2984   PetscErrorCode ierr;
2985   PetscInt       i, newbs = PetscAbs(bs);
2986 
2987   PetscFunctionBegin;
2988   if (bs < 0) {
2989     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2990       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2991     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2992     bs   = PetscAbs(bs);
2993   }
2994   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2995   bs = newbs;
2996 
2997 
2998   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2999   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3000   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3001   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3002   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3003 
3004   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3005   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3006   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3007   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3008 
3009   if (d_nnz) {
3010     for (i=0; i<B->rmap->n/bs; i++) {
3011       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3012     }
3013   }
3014   if (o_nnz) {
3015     for (i=0; i<B->rmap->n/bs; i++) {
3016       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3017     }
3018   }
3019 
3020   b = (Mat_MPIBAIJ*)B->data;
3021   b->bs2 = bs*bs;
3022   b->mbs = B->rmap->n/bs;
3023   b->nbs = B->cmap->n/bs;
3024   b->Mbs = B->rmap->N/bs;
3025   b->Nbs = B->cmap->N/bs;
3026 
3027   for (i=0; i<=b->size; i++) {
3028     b->rangebs[i] = B->rmap->range[i]/bs;
3029   }
3030   b->rstartbs = B->rmap->rstart/bs;
3031   b->rendbs   = B->rmap->rend/bs;
3032   b->cstartbs = B->cmap->rstart/bs;
3033   b->cendbs   = B->cmap->rend/bs;
3034 
3035   if (!B->preallocated) {
3036     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3037     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3038     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3039     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3040     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3041     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3042     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3043     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3044     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3045   }
3046 
3047   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3048   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3049   B->preallocated = PETSC_TRUE;
3050   PetscFunctionReturn(0);
3051 }
3052 EXTERN_C_END
3053 
3054 EXTERN_C_BEGIN
3055 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3056 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3057 EXTERN_C_END
3058 
3059 
3060 EXTERN_C_BEGIN
3061 #undef __FUNCT__
3062 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3063 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3064 {
3065   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3066   PetscErrorCode ierr;
3067   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3068   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3069   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3070 
3071   PetscFunctionBegin;
3072   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3073   ii[0] = 0;
3074   CHKMEMQ;
3075   for (i=0; i<M; i++) {
3076     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3077     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3078     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3079     /* remove one from count of matrix has diagonal */
3080     for (j=id[i]; j<id[i+1]; j++) {
3081       if (jd[j] == i) {ii[i+1]--;break;}
3082     }
3083   CHKMEMQ;
3084   }
3085   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3086   cnt = 0;
3087   for (i=0; i<M; i++) {
3088     for (j=io[i]; j<io[i+1]; j++) {
3089       if (garray[jo[j]] > rstart) break;
3090       jj[cnt++] = garray[jo[j]];
3091   CHKMEMQ;
3092     }
3093     for (k=id[i]; k<id[i+1]; k++) {
3094       if (jd[k] != i) {
3095         jj[cnt++] = rstart + jd[k];
3096   CHKMEMQ;
3097       }
3098     }
3099     for (;j<io[i+1]; j++) {
3100       jj[cnt++] = garray[jo[j]];
3101   CHKMEMQ;
3102     }
3103   }
3104   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3105   PetscFunctionReturn(0);
3106 }
3107 EXTERN_C_END
3108 
3109 EXTERN_C_BEGIN
3110 #if defined(PETSC_HAVE_MUMPS)
3111 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3112 #endif
3113 EXTERN_C_END
3114 
3115 /*MC
3116    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3117 
3118    Options Database Keys:
3119 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3120 . -mat_block_size <bs> - set the blocksize used to store the matrix
3121 - -mat_use_hash_table <fact>
3122 
3123   Level: beginner
3124 
3125 .seealso: MatCreateMPIBAIJ
3126 M*/
3127 
3128 EXTERN_C_BEGIN
3129 #undef __FUNCT__
3130 #define __FUNCT__ "MatCreate_MPIBAIJ"
3131 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
3132 {
3133   Mat_MPIBAIJ    *b;
3134   PetscErrorCode ierr;
3135   PetscBool      flg;
3136 
3137   PetscFunctionBegin;
3138   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3139   B->data = (void*)b;
3140 
3141 
3142   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3143   B->mapping    = 0;
3144   B->assembled  = PETSC_FALSE;
3145 
3146   B->insertmode = NOT_SET_VALUES;
3147   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3148   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3149 
3150   /* build local table of row and column ownerships */
3151   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3152 
3153   /* build cache for off array entries formed */
3154   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3155   b->donotstash  = PETSC_FALSE;
3156   b->colmap      = PETSC_NULL;
3157   b->garray      = PETSC_NULL;
3158   b->roworiented = PETSC_TRUE;
3159 
3160   /* stuff used in block assembly */
3161   b->barray       = 0;
3162 
3163   /* stuff used for matrix vector multiply */
3164   b->lvec         = 0;
3165   b->Mvctx        = 0;
3166 
3167   /* stuff for MatGetRow() */
3168   b->rowindices   = 0;
3169   b->rowvalues    = 0;
3170   b->getrowactive = PETSC_FALSE;
3171 
3172   /* hash table stuff */
3173   b->ht           = 0;
3174   b->hd           = 0;
3175   b->ht_size      = 0;
3176   b->ht_flag      = PETSC_FALSE;
3177   b->ht_fact      = 0;
3178   b->ht_total_ct  = 0;
3179   b->ht_insert_ct = 0;
3180 
3181   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3182     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3183     if (flg) {
3184       PetscReal fact = 1.39;
3185       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3186       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3187       if (fact <= 1.0) fact = 1.39;
3188       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3189       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3190     }
3191   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3192 
3193 #if defined(PETSC_HAVE_MUMPS)
3194   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3195 #endif
3196   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3197                                      "MatConvert_MPIBAIJ_MPIAdj",
3198                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3199   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3200                                      "MatStoreValues_MPIBAIJ",
3201                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3202   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3203                                      "MatRetrieveValues_MPIBAIJ",
3204                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3205   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3206                                      "MatGetDiagonalBlock_MPIBAIJ",
3207                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3208   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3209                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3210                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3211   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3212 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3213 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3214   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3215                                      "MatDiagonalScaleLocal_MPIBAIJ",
3216                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3217   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3218                                      "MatSetHashTableFactor_MPIBAIJ",
3219                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3220   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3221   PetscFunctionReturn(0);
3222 }
3223 EXTERN_C_END
3224 
3225 /*MC
3226    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3227 
3228    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3229    and MATMPIBAIJ otherwise.
3230 
3231    Options Database Keys:
3232 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3233 
3234   Level: beginner
3235 
3236 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3237 M*/
3238 
3239 EXTERN_C_BEGIN
3240 #undef __FUNCT__
3241 #define __FUNCT__ "MatCreate_BAIJ"
3242 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3243 {
3244   PetscErrorCode ierr;
3245   PetscMPIInt    size;
3246 
3247   PetscFunctionBegin;
3248   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3249   if (size == 1) {
3250     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3251   } else {
3252     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3253   }
3254   PetscFunctionReturn(0);
3255 }
3256 EXTERN_C_END
3257 
3258 #undef __FUNCT__
3259 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3260 /*@C
3261    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3262    (block compressed row).  For good matrix assembly performance
3263    the user should preallocate the matrix storage by setting the parameters
3264    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3265    performance can be increased by more than a factor of 50.
3266 
3267    Collective on Mat
3268 
3269    Input Parameters:
3270 +  A - the matrix
3271 .  bs   - size of blockk
3272 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3273            submatrix  (same for all local rows)
3274 .  d_nnz - array containing the number of block nonzeros in the various block rows
3275            of the in diagonal portion of the local (possibly different for each block
3276            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3277 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3278            submatrix (same for all local rows).
3279 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3280            off-diagonal portion of the local submatrix (possibly different for
3281            each block row) or PETSC_NULL.
3282 
3283    If the *_nnz parameter is given then the *_nz parameter is ignored
3284 
3285    Options Database Keys:
3286 +   -mat_block_size - size of the blocks to use
3287 -   -mat_use_hash_table <fact>
3288 
3289    Notes:
3290    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3291    than it must be used on all processors that share the object for that argument.
3292 
3293    Storage Information:
3294    For a square global matrix we define each processor's diagonal portion
3295    to be its local rows and the corresponding columns (a square submatrix);
3296    each processor's off-diagonal portion encompasses the remainder of the
3297    local matrix (a rectangular submatrix).
3298 
3299    The user can specify preallocated storage for the diagonal part of
3300    the local submatrix with either d_nz or d_nnz (not both).  Set
3301    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3302    memory allocation.  Likewise, specify preallocated storage for the
3303    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3304 
3305    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3306    the figure below we depict these three local rows and all columns (0-11).
3307 
3308 .vb
3309            0 1 2 3 4 5 6 7 8 9 10 11
3310           -------------------
3311    row 3  |  o o o d d d o o o o o o
3312    row 4  |  o o o d d d o o o o o o
3313    row 5  |  o o o d d d o o o o o o
3314           -------------------
3315 .ve
3316 
3317    Thus, any entries in the d locations are stored in the d (diagonal)
3318    submatrix, and any entries in the o locations are stored in the
3319    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3320    stored simply in the MATSEQBAIJ format for compressed row storage.
3321 
3322    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3323    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3324    In general, for PDE problems in which most nonzeros are near the diagonal,
3325    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3326    or you will get TERRIBLE performance; see the users' manual chapter on
3327    matrices.
3328 
3329    You can call MatGetInfo() to get information on how effective the preallocation was;
3330    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3331    You can also run with the option -info and look for messages with the string
3332    malloc in them to see if additional memory allocation was needed.
3333 
3334    Level: intermediate
3335 
3336 .keywords: matrix, block, aij, compressed row, sparse, parallel
3337 
3338 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3339 @*/
3340 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3341 {
3342   PetscErrorCode ierr;
3343 
3344   PetscFunctionBegin;
3345   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3346   PetscFunctionReturn(0);
3347 }
3348 
3349 #undef __FUNCT__
3350 #define __FUNCT__ "MatCreateMPIBAIJ"
3351 /*@C
3352    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3353    (block compressed row).  For good matrix assembly performance
3354    the user should preallocate the matrix storage by setting the parameters
3355    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3356    performance can be increased by more than a factor of 50.
3357 
3358    Collective on MPI_Comm
3359 
3360    Input Parameters:
3361 +  comm - MPI communicator
3362 .  bs   - size of blockk
3363 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3364            This value should be the same as the local size used in creating the
3365            y vector for the matrix-vector product y = Ax.
3366 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3367            This value should be the same as the local size used in creating the
3368            x vector for the matrix-vector product y = Ax.
3369 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3370 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3371 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3372            submatrix  (same for all local rows)
3373 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3374            of the in diagonal portion of the local (possibly different for each block
3375            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3376 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3377            submatrix (same for all local rows).
3378 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3379            off-diagonal portion of the local submatrix (possibly different for
3380            each block row) or PETSC_NULL.
3381 
3382    Output Parameter:
3383 .  A - the matrix
3384 
3385    Options Database Keys:
3386 +   -mat_block_size - size of the blocks to use
3387 -   -mat_use_hash_table <fact>
3388 
3389    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3390    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3391    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3392 
3393    Notes:
3394    If the *_nnz parameter is given then the *_nz parameter is ignored
3395 
3396    A nonzero block is any block that as 1 or more nonzeros in it
3397 
3398    The user MUST specify either the local or global matrix dimensions
3399    (possibly both).
3400 
3401    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3402    than it must be used on all processors that share the object for that argument.
3403 
3404    Storage Information:
3405    For a square global matrix we define each processor's diagonal portion
3406    to be its local rows and the corresponding columns (a square submatrix);
3407    each processor's off-diagonal portion encompasses the remainder of the
3408    local matrix (a rectangular submatrix).
3409 
3410    The user can specify preallocated storage for the diagonal part of
3411    the local submatrix with either d_nz or d_nnz (not both).  Set
3412    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3413    memory allocation.  Likewise, specify preallocated storage for the
3414    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3415 
3416    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3417    the figure below we depict these three local rows and all columns (0-11).
3418 
3419 .vb
3420            0 1 2 3 4 5 6 7 8 9 10 11
3421           -------------------
3422    row 3  |  o o o d d d o o o o o o
3423    row 4  |  o o o d d d o o o o o o
3424    row 5  |  o o o d d d o o o o o o
3425           -------------------
3426 .ve
3427 
3428    Thus, any entries in the d locations are stored in the d (diagonal)
3429    submatrix, and any entries in the o locations are stored in the
3430    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3431    stored simply in the MATSEQBAIJ format for compressed row storage.
3432 
3433    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3434    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3435    In general, for PDE problems in which most nonzeros are near the diagonal,
3436    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3437    or you will get TERRIBLE performance; see the users' manual chapter on
3438    matrices.
3439 
3440    Level: intermediate
3441 
3442 .keywords: matrix, block, aij, compressed row, sparse, parallel
3443 
3444 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3445 @*/
3446 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3447 {
3448   PetscErrorCode ierr;
3449   PetscMPIInt    size;
3450 
3451   PetscFunctionBegin;
3452   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3453   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3454   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3455   if (size > 1) {
3456     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3457     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3458   } else {
3459     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3460     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3461   }
3462   PetscFunctionReturn(0);
3463 }
3464 
3465 #undef __FUNCT__
3466 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3467 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3468 {
3469   Mat            mat;
3470   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3471   PetscErrorCode ierr;
3472   PetscInt       len=0;
3473 
3474   PetscFunctionBegin;
3475   *newmat       = 0;
3476   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3477   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3478   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3479   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3480 
3481   mat->factortype   = matin->factortype;
3482   mat->preallocated = PETSC_TRUE;
3483   mat->assembled    = PETSC_TRUE;
3484   mat->insertmode   = NOT_SET_VALUES;
3485 
3486   a      = (Mat_MPIBAIJ*)mat->data;
3487   mat->rmap->bs  = matin->rmap->bs;
3488   a->bs2   = oldmat->bs2;
3489   a->mbs   = oldmat->mbs;
3490   a->nbs   = oldmat->nbs;
3491   a->Mbs   = oldmat->Mbs;
3492   a->Nbs   = oldmat->Nbs;
3493 
3494   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3495   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3496 
3497   a->size         = oldmat->size;
3498   a->rank         = oldmat->rank;
3499   a->donotstash   = oldmat->donotstash;
3500   a->roworiented  = oldmat->roworiented;
3501   a->rowindices   = 0;
3502   a->rowvalues    = 0;
3503   a->getrowactive = PETSC_FALSE;
3504   a->barray       = 0;
3505   a->rstartbs     = oldmat->rstartbs;
3506   a->rendbs       = oldmat->rendbs;
3507   a->cstartbs     = oldmat->cstartbs;
3508   a->cendbs       = oldmat->cendbs;
3509 
3510   /* hash table stuff */
3511   a->ht           = 0;
3512   a->hd           = 0;
3513   a->ht_size      = 0;
3514   a->ht_flag      = oldmat->ht_flag;
3515   a->ht_fact      = oldmat->ht_fact;
3516   a->ht_total_ct  = 0;
3517   a->ht_insert_ct = 0;
3518 
3519   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3520   if (oldmat->colmap) {
3521 #if defined (PETSC_USE_CTABLE)
3522   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3523 #else
3524   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3525   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3526   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3527 #endif
3528   } else a->colmap = 0;
3529 
3530   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3531     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3532     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3533     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3534   } else a->garray = 0;
3535 
3536   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3537   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3538   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3539   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3540   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3541 
3542   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3543   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3544   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3545   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3546   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3547   *newmat = mat;
3548 
3549   PetscFunctionReturn(0);
3550 }
3551 
3552 #undef __FUNCT__
3553 #define __FUNCT__ "MatLoad_MPIBAIJ"
3554 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3555 {
3556   PetscErrorCode ierr;
3557   int            fd;
3558   PetscInt       i,nz,j,rstart,rend;
3559   PetscScalar    *vals,*buf;
3560   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3561   MPI_Status     status;
3562   PetscMPIInt    rank,size,maxnz;
3563   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3564   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3565   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3566   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3567   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3568   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3569 
3570   PetscFunctionBegin;
3571   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3572     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3573   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3574 
3575   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3576   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3577   if (!rank) {
3578     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3579     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3580     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3581   }
3582 
3583   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3584 
3585   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3586   M = header[1]; N = header[2];
3587 
3588   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3589   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3590   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3591 
3592   /* If global sizes are set, check if they are consistent with that given in the file */
3593   if (sizesset) {
3594     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3595   }
3596   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3597   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3598 
3599   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3600 
3601   /*
3602      This code adds extra rows to make sure the number of rows is
3603      divisible by the blocksize
3604   */
3605   Mbs        = M/bs;
3606   extra_rows = bs - M + bs*Mbs;
3607   if (extra_rows == bs) extra_rows = 0;
3608   else                  Mbs++;
3609   if (extra_rows && !rank) {
3610     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3611   }
3612 
3613   /* determine ownership of all rows */
3614   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3615     mbs        = Mbs/size + ((Mbs % size) > rank);
3616     m          = mbs*bs;
3617   } else { /* User set */
3618     m          = newmat->rmap->n;
3619     mbs        = m/bs;
3620   }
3621   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3622   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3623 
3624   /* process 0 needs enough room for process with most rows */
3625   if (!rank) {
3626     mmax = rowners[1];
3627     for (i=2; i<size; i++) {
3628       mmax = PetscMax(mmax,rowners[i]);
3629     }
3630     mmax*=bs;
3631   } else mmax = m;
3632 
3633   rowners[0] = 0;
3634   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3635   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3636   rstart = rowners[rank];
3637   rend   = rowners[rank+1];
3638 
3639   /* distribute row lengths to all processors */
3640   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3641   if (!rank) {
3642     mend = m;
3643     if (size == 1) mend = mend - extra_rows;
3644     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3645     for (j=mend; j<m; j++) locrowlens[j] = 1;
3646     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3647     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3648     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3649     for (j=0; j<m; j++) {
3650       procsnz[0] += locrowlens[j];
3651     }
3652     for (i=1; i<size; i++) {
3653       mend = browners[i+1] - browners[i];
3654       if (i == size-1) mend = mend - extra_rows;
3655       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3656       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3657       /* calculate the number of nonzeros on each processor */
3658       for (j=0; j<browners[i+1]-browners[i]; j++) {
3659         procsnz[i] += rowlengths[j];
3660       }
3661       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3662     }
3663     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3664   } else {
3665     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3666   }
3667 
3668   if (!rank) {
3669     /* determine max buffer needed and allocate it */
3670     maxnz = procsnz[0];
3671     for (i=1; i<size; i++) {
3672       maxnz = PetscMax(maxnz,procsnz[i]);
3673     }
3674     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3675 
3676     /* read in my part of the matrix column indices  */
3677     nz     = procsnz[0];
3678     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3679     mycols = ibuf;
3680     if (size == 1)  nz -= extra_rows;
3681     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3682     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3683 
3684     /* read in every ones (except the last) and ship off */
3685     for (i=1; i<size-1; i++) {
3686       nz   = procsnz[i];
3687       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3688       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3689     }
3690     /* read in the stuff for the last proc */
3691     if (size != 1) {
3692       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3693       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3694       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3695       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3696     }
3697     ierr = PetscFree(cols);CHKERRQ(ierr);
3698   } else {
3699     /* determine buffer space needed for message */
3700     nz = 0;
3701     for (i=0; i<m; i++) {
3702       nz += locrowlens[i];
3703     }
3704     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3705     mycols = ibuf;
3706     /* receive message of column indices*/
3707     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3708     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3709     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3710   }
3711 
3712   /* loop over local rows, determining number of off diagonal entries */
3713   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3714   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3715   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3716   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3717   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3718   rowcount = 0; nzcount = 0;
3719   for (i=0; i<mbs; i++) {
3720     dcount  = 0;
3721     odcount = 0;
3722     for (j=0; j<bs; j++) {
3723       kmax = locrowlens[rowcount];
3724       for (k=0; k<kmax; k++) {
3725         tmp = mycols[nzcount++]/bs;
3726         if (!mask[tmp]) {
3727           mask[tmp] = 1;
3728           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3729           else masked1[dcount++] = tmp;
3730         }
3731       }
3732       rowcount++;
3733     }
3734 
3735     dlens[i]  = dcount;
3736     odlens[i] = odcount;
3737 
3738     /* zero out the mask elements we set */
3739     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3740     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3741   }
3742 
3743 
3744   if (!sizesset) {
3745     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3746   }
3747   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3748 
3749   if (!rank) {
3750     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3751     /* read in my part of the matrix numerical values  */
3752     nz = procsnz[0];
3753     vals = buf;
3754     mycols = ibuf;
3755     if (size == 1)  nz -= extra_rows;
3756     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3757     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3758 
3759     /* insert into matrix */
3760     jj      = rstart*bs;
3761     for (i=0; i<m; i++) {
3762       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3763       mycols += locrowlens[i];
3764       vals   += locrowlens[i];
3765       jj++;
3766     }
3767     /* read in other processors (except the last one) and ship out */
3768     for (i=1; i<size-1; i++) {
3769       nz   = procsnz[i];
3770       vals = buf;
3771       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3772       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3773     }
3774     /* the last proc */
3775     if (size != 1){
3776       nz   = procsnz[i] - extra_rows;
3777       vals = buf;
3778       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3779       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3780       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3781     }
3782     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3783   } else {
3784     /* receive numeric values */
3785     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3786 
3787     /* receive message of values*/
3788     vals   = buf;
3789     mycols = ibuf;
3790     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3791     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3792     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3793 
3794     /* insert into matrix */
3795     jj      = rstart*bs;
3796     for (i=0; i<m; i++) {
3797       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3798       mycols += locrowlens[i];
3799       vals   += locrowlens[i];
3800       jj++;
3801     }
3802   }
3803   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3804   ierr = PetscFree(buf);CHKERRQ(ierr);
3805   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3806   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3807   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3808   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3809   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3810   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3811 
3812   PetscFunctionReturn(0);
3813 }
3814 
3815 #undef __FUNCT__
3816 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3817 /*@
3818    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3819 
3820    Input Parameters:
3821 .  mat  - the matrix
3822 .  fact - factor
3823 
3824    Not Collective, each process can use a different factor
3825 
3826    Level: advanced
3827 
3828   Notes:
3829    This can also be set by the command line option: -mat_use_hash_table <fact>
3830 
3831 .keywords: matrix, hashtable, factor, HT
3832 
3833 .seealso: MatSetOption()
3834 @*/
3835 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3836 {
3837   PetscErrorCode ierr;
3838 
3839   PetscFunctionBegin;
3840   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3841   PetscFunctionReturn(0);
3842 }
3843 
3844 EXTERN_C_BEGIN
3845 #undef __FUNCT__
3846 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3847 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3848 {
3849   Mat_MPIBAIJ *baij;
3850 
3851   PetscFunctionBegin;
3852   baij = (Mat_MPIBAIJ*)mat->data;
3853   baij->ht_fact = fact;
3854   PetscFunctionReturn(0);
3855 }
3856 EXTERN_C_END
3857 
3858 #undef __FUNCT__
3859 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3860 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3861 {
3862   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3863   PetscFunctionBegin;
3864   *Ad     = a->A;
3865   *Ao     = a->B;
3866   *colmap = a->garray;
3867   PetscFunctionReturn(0);
3868 }
3869 
3870 /*
3871     Special version for direct calls from Fortran (to eliminate two function call overheads
3872 */
3873 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3874 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3875 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3876 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3877 #endif
3878 
3879 #undef __FUNCT__
3880 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3881 /*@C
3882   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3883 
3884   Collective on Mat
3885 
3886   Input Parameters:
3887 + mat - the matrix
3888 . min - number of input rows
3889 . im - input rows
3890 . nin - number of input columns
3891 . in - input columns
3892 . v - numerical values input
3893 - addvin - INSERT_VALUES or ADD_VALUES
3894 
3895   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3896 
3897   Level: advanced
3898 
3899 .seealso:   MatSetValuesBlocked()
3900 @*/
3901 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3902 {
3903   /* convert input arguments to C version */
3904   Mat             mat = *matin;
3905   PetscInt        m = *min, n = *nin;
3906   InsertMode      addv = *addvin;
3907 
3908   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3909   const MatScalar *value;
3910   MatScalar       *barray=baij->barray;
3911   PetscBool       roworiented = baij->roworiented;
3912   PetscErrorCode  ierr;
3913   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3914   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3915   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3916 
3917   PetscFunctionBegin;
3918   /* tasks normally handled by MatSetValuesBlocked() */
3919   if (mat->insertmode == NOT_SET_VALUES) {
3920     mat->insertmode = addv;
3921   }
3922 #if defined(PETSC_USE_DEBUG)
3923   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3924   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3925 #endif
3926   if (mat->assembled) {
3927     mat->was_assembled = PETSC_TRUE;
3928     mat->assembled     = PETSC_FALSE;
3929   }
3930   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3931 
3932 
3933   if(!barray) {
3934     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3935     baij->barray = barray;
3936   }
3937 
3938   if (roworiented) {
3939     stepval = (n-1)*bs;
3940   } else {
3941     stepval = (m-1)*bs;
3942   }
3943   for (i=0; i<m; i++) {
3944     if (im[i] < 0) continue;
3945 #if defined(PETSC_USE_DEBUG)
3946     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3947 #endif
3948     if (im[i] >= rstart && im[i] < rend) {
3949       row = im[i] - rstart;
3950       for (j=0; j<n; j++) {
3951         /* If NumCol = 1 then a copy is not required */
3952         if ((roworiented) && (n == 1)) {
3953           barray = (MatScalar*)v + i*bs2;
3954         } else if((!roworiented) && (m == 1)) {
3955           barray = (MatScalar*)v + j*bs2;
3956         } else { /* Here a copy is required */
3957           if (roworiented) {
3958             value = v + i*(stepval+bs)*bs + j*bs;
3959           } else {
3960             value = v + j*(stepval+bs)*bs + i*bs;
3961           }
3962           for (ii=0; ii<bs; ii++,value+=stepval) {
3963             for (jj=0; jj<bs; jj++) {
3964               *barray++  = *value++;
3965             }
3966           }
3967           barray -=bs2;
3968         }
3969 
3970         if (in[j] >= cstart && in[j] < cend){
3971           col  = in[j] - cstart;
3972           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3973         }
3974         else if (in[j] < 0) continue;
3975 #if defined(PETSC_USE_DEBUG)
3976         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3977 #endif
3978         else {
3979           if (mat->was_assembled) {
3980             if (!baij->colmap) {
3981               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3982             }
3983 
3984 #if defined(PETSC_USE_DEBUG)
3985 #if defined (PETSC_USE_CTABLE)
3986             { PetscInt data;
3987               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3988               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3989             }
3990 #else
3991             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3992 #endif
3993 #endif
3994 #if defined (PETSC_USE_CTABLE)
3995 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3996             col  = (col - 1)/bs;
3997 #else
3998             col = (baij->colmap[in[j]] - 1)/bs;
3999 #endif
4000             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4001               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4002               col =  in[j];
4003             }
4004           }
4005           else col = in[j];
4006           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4007         }
4008       }
4009     } else {
4010       if (!baij->donotstash) {
4011         if (roworiented) {
4012           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4013         } else {
4014           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4015         }
4016       }
4017     }
4018   }
4019 
4020   /* task normally handled by MatSetValuesBlocked() */
4021   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4022   PetscFunctionReturn(0);
4023 }
4024 
4025 #undef __FUNCT__
4026 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4027 /*@
4028      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4029          CSR format the local rows.
4030 
4031    Collective on MPI_Comm
4032 
4033    Input Parameters:
4034 +  comm - MPI communicator
4035 .  bs - the block size, only a block size of 1 is supported
4036 .  m - number of local rows (Cannot be PETSC_DECIDE)
4037 .  n - This value should be the same as the local size used in creating the
4038        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4039        calculated if N is given) For square matrices n is almost always m.
4040 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4041 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4042 .   i - row indices
4043 .   j - column indices
4044 -   a - matrix values
4045 
4046    Output Parameter:
4047 .   mat - the matrix
4048 
4049    Level: intermediate
4050 
4051    Notes:
4052        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4053      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4054      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4055 
4056        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4057 
4058 .keywords: matrix, aij, compressed row, sparse, parallel
4059 
4060 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4061           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
4062 @*/
4063 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4064 {
4065   PetscErrorCode ierr;
4066 
4067 
4068  PetscFunctionBegin;
4069   if (i[0]) {
4070     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4071   }
4072   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4073   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4074   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4075   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4076   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4077   PetscFunctionReturn(0);
4078 }
4079