xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 547bc4d4d2815e4285fa0d952804660eceb09ae1)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {
29       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
30     }
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++) {
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
40       va[i] = vb[i];
41       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
42     }
43   }
44 
45   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
46   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
47   ierr = PetscFree(idxb);CHKERRQ(ierr);
48   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 #undef __FUNCT__
53 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
54 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
55 {
56   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
57   PetscErrorCode ierr;
58 
59   PetscFunctionBegin;
60   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
61   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
62   PetscFunctionReturn(0);
63 }
64 
65 #undef __FUNCT__
66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
67 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79      Local utility routine that creates a mapping from the global column
80    number to the local number in the off-diagonal part of the local
81    storage of the matrix.  This is done in a non scalable way since the
82    length of colmap equals the global matrix length.
83 */
84 #undef __FUNCT__
85 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
86 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
87 {
88   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
89   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
90   PetscErrorCode ierr;
91   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
92 
93   PetscFunctionBegin;
94 #if defined(PETSC_USE_CTABLE)
95   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
96   for (i=0; i<nbs; i++) {
97     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
98   }
99 #else
100   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
101   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
104 #endif
105   PetscFunctionReturn(0);
106 }
107 
108 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
109   { \
110  \
111     brow = row/bs;  \
112     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
113     rmax = aimax[brow]; nrow = ailen[brow]; \
114     bcol = col/bs; \
115     ridx = row % bs; cidx = col % bs; \
116     low  = 0; high = nrow; \
117     while (high-low > 3) { \
118       t = (low+high)/2; \
119       if (rp[t] > bcol) high = t; \
120       else              low  = t; \
121     } \
122     for (_i=low; _i<high; _i++) { \
123       if (rp[_i] > bcol) break; \
124       if (rp[_i] == bcol) { \
125         bap = ap +  bs2*_i + bs*cidx + ridx; \
126         if (addv == ADD_VALUES) *bap += value;  \
127         else                    *bap  = value;  \
128         goto a_noinsert; \
129       } \
130     } \
131     if (a->nonew == 1) goto a_noinsert; \
132     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
133     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
134     N = nrow++ - 1;  \
135     /* shift up all the later entries in this row */ \
136     for (ii=N; ii>=_i; ii--) { \
137       rp[ii+1] = rp[ii]; \
138       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
139     } \
140     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
141     rp[_i]                      = bcol;  \
142     ap[bs2*_i + bs*cidx + ridx] = value;  \
143 a_noinsert:; \
144     ailen[brow] = nrow; \
145   }
146 
147 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
148   { \
149     brow = row/bs;  \
150     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
151     rmax = bimax[brow]; nrow = bilen[brow]; \
152     bcol = col/bs; \
153     ridx = row % bs; cidx = col % bs; \
154     low  = 0; high = nrow; \
155     while (high-low > 3) { \
156       t = (low+high)/2; \
157       if (rp[t] > bcol) high = t; \
158       else              low  = t; \
159     } \
160     for (_i=low; _i<high; _i++) { \
161       if (rp[_i] > bcol) break; \
162       if (rp[_i] == bcol) { \
163         bap = ap +  bs2*_i + bs*cidx + ridx; \
164         if (addv == ADD_VALUES) *bap += value;  \
165         else                    *bap  = value;  \
166         goto b_noinsert; \
167       } \
168     } \
169     if (b->nonew == 1) goto b_noinsert; \
170     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
171     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
172     CHKMEMQ; \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   if (v) PetscValidScalarPointer(v,6);
216   for (i=0; i<m; i++) {
217     if (im[i] < 0) continue;
218 #if defined(PETSC_USE_DEBUG)
219     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
220 #endif
221     if (im[i] >= rstart_orig && im[i] < rend_orig) {
222       row = im[i] - rstart_orig;
223       for (j=0; j<n; j++) {
224         if (in[j] >= cstart_orig && in[j] < cend_orig) {
225           col = in[j] - cstart_orig;
226           if (roworiented) value = v[i*n+j];
227           else             value = v[i+j*m];
228           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         } else if (in[j] < 0) continue;
231 #if defined(PETSC_USE_DEBUG)
232         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined(PETSC_USE_CTABLE)
240             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
241             col  = col - 1;
242 #else
243             col = baij->colmap[in[j]/bs] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
246               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
247               col  =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
249               B    = baij->B;
250               b    = (Mat_SeqBAIJ*)(B)->data;
251               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
252               ba   =b->a;
253             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
254             else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j];
257           else             value = v[i+j*m];
258           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
260         }
261       }
262     } else {
263       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
264       if (!baij->donotstash) {
265         mat->assembled = PETSC_FALSE;
266         if (roworiented) {
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
268         } else {
269           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
270         }
271       }
272     }
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
280 {
281   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
282   const PetscScalar *value;
283   MatScalar         *barray     = baij->barray;
284   PetscBool         roworiented = baij->roworiented;
285   PetscErrorCode    ierr;
286   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
287   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
288   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
289 
290   PetscFunctionBegin;
291   if (!barray) {
292     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
293     baij->barray = barray;
294   }
295 
296   if (roworiented) stepval = (n-1)*bs;
297   else stepval = (m-1)*bs;
298 
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if ((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316             value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
320             barray += bs;
321           }
322           barray -= bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend) {
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         } else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined(PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined(PETSC_USE_CTABLE)
349             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
355               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col  =  in[j];
357             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
358           } else col = in[j];
359           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
360         }
361       }
362     } else {
363       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
364       if (!baij->donotstash) {
365         if (roworiented) {
366           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
367         } else {
368           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         }
370       }
371     }
372   }
373   PetscFunctionReturn(0);
374 }
375 
376 #define HASH_KEY 0.6180339887
377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 #undef __FUNCT__
381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
383 {
384   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
385   PetscBool      roworiented = baij->roworiented;
386   PetscErrorCode ierr;
387   PetscInt       i,j,row,col;
388   PetscInt       rstart_orig=mat->rmap->rstart;
389   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
390   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
391   PetscReal      tmp;
392   MatScalar      **HD = baij->hd,value;
393 #if defined(PETSC_USE_DEBUG)
394   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
395 #endif
396 
397   PetscFunctionBegin;
398   if (v) PetscValidScalarPointer(v,6);
399   for (i=0; i<m; i++) {
400 #if defined(PETSC_USE_DEBUG)
401     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
402     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
403 #endif
404     row = im[i];
405     if (row >= rstart_orig && row < rend_orig) {
406       for (j=0; j<n; j++) {
407         col = in[j];
408         if (roworiented) value = v[i*n+j];
409         else             value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
423             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
424           }
425         }
426 #else
427         if (HT[idx] != key) {
428           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
429           if (idx == size) {
430             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
431             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
432           }
433         }
434 #endif
435         /* A HASH table entry is found, so insert the values at the correct address */
436         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
438       }
439     } else if (!baij->donotstash) {
440       if (roworiented) {
441         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
442       } else {
443         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
444       }
445     }
446   }
447 #if defined(PETSC_USE_DEBUG)
448   baij->ht_total_ct  = total_ct;
449   baij->ht_insert_ct = insert_ct;
450 #endif
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
457 {
458   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
459   PetscBool         roworiented = baij->roworiented;
460   PetscErrorCode    ierr;
461   PetscInt          i,j,ii,jj,row,col;
462   PetscInt          rstart=baij->rstartbs;
463   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
464   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
465   PetscReal         tmp;
466   MatScalar         **HD = baij->hd,*baij_a;
467   const PetscScalar *v_t,*value;
468 #if defined(PETSC_USE_DEBUG)
469   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
470 #endif
471 
472   PetscFunctionBegin;
473   if (roworiented) stepval = (n-1)*bs;
474   else stepval = (m-1)*bs;
475 
476   for (i=0; i<m; i++) {
477 #if defined(PETSC_USE_DEBUG)
478     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
479     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
480 #endif
481     row = im[i];
482     v_t = v + i*nbs2;
483     if (row >= rstart && row < rend) {
484       for (j=0; j<n; j++) {
485         col = in[j];
486 
487         /* Look up into the Hash Table */
488         key = row*Nbs+col+1;
489         h1  = HASH(size,key,tmp);
490 
491         idx = h1;
492 #if defined(PETSC_USE_DEBUG)
493         total_ct++;
494         insert_ct++;
495         if (HT[idx] != key) {
496           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
497           if (idx == size) {
498             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
499             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
500           }
501         }
502 #else
503         if (HT[idx] != key) {
504           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
505           if (idx == size) {
506             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
507             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
508           }
509         }
510 #endif
511         baij_a = HD[idx];
512         if (roworiented) {
513           /*value = v + i*(stepval+bs)*bs + j*bs;*/
514           /* value = v + (i*(stepval+bs)+j)*bs; */
515           value = v_t;
516           v_t  += bs;
517           if (addv == ADD_VALUES) {
518             for (ii=0; ii<bs; ii++,value+=stepval) {
519               for (jj=ii; jj<bs2; jj+=bs) {
520                 baij_a[jj] += *value++;
521               }
522             }
523           } else {
524             for (ii=0; ii<bs; ii++,value+=stepval) {
525               for (jj=ii; jj<bs2; jj+=bs) {
526                 baij_a[jj] = *value++;
527               }
528             }
529           }
530         } else {
531           value = v + j*(stepval+bs)*bs + i*bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
534               for (jj=0; jj<bs; jj++) {
535                 baij_a[jj] += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
540               for (jj=0; jj<bs; jj++) {
541                 baij_a[jj] = *value++;
542               }
543             }
544           }
545         }
546       }
547     } else {
548       if (!baij->donotstash) {
549         if (roworiented) {
550           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557 #if defined(PETSC_USE_DEBUG)
558   baij->ht_total_ct  = total_ct;
559   baij->ht_insert_ct = insert_ct;
560 #endif
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatGetValues_MPIBAIJ"
566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
567 {
568   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
569   PetscErrorCode ierr;
570   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
571   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
572 
573   PetscFunctionBegin;
574   for (i=0; i<m; i++) {
575     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
576     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
577     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
578       row = idxm[i] - bsrstart;
579       for (j=0; j<n; j++) {
580         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
581         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
582         if (idxn[j] >= bscstart && idxn[j] < bscend) {
583           col  = idxn[j] - bscstart;
584           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
585         } else {
586           if (!baij->colmap) {
587             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588           }
589 #if defined(PETSC_USE_CTABLE)
590           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
591           data--;
592 #else
593           data = baij->colmap[idxn[j]/bs]-1;
594 #endif
595           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
596           else {
597             col  = data + idxn[j]%bs;
598             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599           }
600         }
601       }
602     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatNorm_MPIBAIJ"
609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
610 {
611   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
612   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
613   PetscErrorCode ierr;
614   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
615   PetscReal      sum = 0.0;
616   MatScalar      *v;
617 
618   PetscFunctionBegin;
619   if (baij->size == 1) {
620     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
621   } else {
622     if (type == NORM_FROBENIUS) {
623       v  = amat->a;
624       nz = amat->nz*bs2;
625       for (i=0; i<nz; i++) {
626         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
627       }
628       v  = bmat->a;
629       nz = bmat->nz*bs2;
630       for (i=0; i<nz; i++) {
631         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
632       }
633       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
634       *nrm = PetscSqrtReal(*nrm);
635     } else if (type == NORM_1) { /* max column sum */
636       PetscReal *tmp,*tmp2;
637       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
638       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
639       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
640       v    = amat->a; jj = amat->j;
641       for (i=0; i<amat->nz; i++) {
642         for (j=0; j<bs; j++) {
643           col = bs*(cstart + *jj) + j; /* column index */
644           for (row=0; row<bs; row++) {
645             tmp[col] += PetscAbsScalar(*v);  v++;
646           }
647         }
648         jj++;
649       }
650       v = bmat->a; jj = bmat->j;
651       for (i=0; i<bmat->nz; i++) {
652         for (j=0; j<bs; j++) {
653           col = bs*garray[*jj] + j;
654           for (row=0; row<bs; row++) {
655             tmp[col] += PetscAbsScalar(*v); v++;
656           }
657         }
658         jj++;
659       }
660       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
661       *nrm = 0.0;
662       for (j=0; j<mat->cmap->N; j++) {
663         if (tmp2[j] > *nrm) *nrm = tmp2[j];
664       }
665       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
666     } else if (type == NORM_INFINITY) { /* max row sum */
667       PetscReal *sums;
668       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
669       sum  = 0.0;
670       for (j=0; j<amat->mbs; j++) {
671         for (row=0; row<bs; row++) sums[row] = 0.0;
672         v  = amat->a + bs2*amat->i[j];
673         nz = amat->i[j+1]-amat->i[j];
674         for (i=0; i<nz; i++) {
675           for (col=0; col<bs; col++) {
676             for (row=0; row<bs; row++) {
677               sums[row] += PetscAbsScalar(*v); v++;
678             }
679           }
680         }
681         v  = bmat->a + bs2*bmat->i[j];
682         nz = bmat->i[j+1]-bmat->i[j];
683         for (i=0; i<nz; i++) {
684           for (col=0; col<bs; col++) {
685             for (row=0; row<bs; row++) {
686               sums[row] += PetscAbsScalar(*v); v++;
687             }
688           }
689         }
690         for (row=0; row<bs; row++) {
691           if (sums[row] > sum) sum = sums[row];
692         }
693       }
694       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
695       ierr = PetscFree(sums);CHKERRQ(ierr);
696     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 /*
702   Creates the hash table, and sets the table
703   This table is created only once.
704   If new entried need to be added to the matrix
705   then the hash table has to be destroyed and
706   recreated.
707 */
708 #undef __FUNCT__
709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
711 {
712   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
713   Mat            A     = baij->A,B=baij->B;
714   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
715   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
716   PetscErrorCode ierr;
717   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
718   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
719   PetscInt       *HT,key;
720   MatScalar      **HD;
721   PetscReal      tmp;
722 #if defined(PETSC_USE_INFO)
723   PetscInt ct=0,max=0;
724 #endif
725 
726   PetscFunctionBegin;
727   if (baij->ht) PetscFunctionReturn(0);
728 
729   baij->ht_size = (PetscInt)(factor*nz);
730   ht_size       = baij->ht_size;
731 
732   /* Allocate Memory for Hash Table */
733   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
734   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
735   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
736   HD   = baij->hd;
737   HT   = baij->ht;
738 
739   /* Loop Over A */
740   for (i=0; i<a->mbs; i++) {
741     for (j=ai[i]; j<ai[i+1]; j++) {
742       row = i+rstart;
743       col = aj[j]+cstart;
744 
745       key = row*Nbs + col + 1;
746       h1  = HASH(ht_size,key,tmp);
747       for (k=0; k<ht_size; k++) {
748         if (!HT[(h1+k)%ht_size]) {
749           HT[(h1+k)%ht_size] = key;
750           HD[(h1+k)%ht_size] = a->a + j*bs2;
751           break;
752 #if defined(PETSC_USE_INFO)
753         } else {
754           ct++;
755 #endif
756         }
757       }
758 #if defined(PETSC_USE_INFO)
759       if (k> max) max = k;
760 #endif
761     }
762   }
763   /* Loop Over B */
764   for (i=0; i<b->mbs; i++) {
765     for (j=bi[i]; j<bi[i+1]; j++) {
766       row = i+rstart;
767       col = garray[bj[j]];
768       key = row*Nbs + col + 1;
769       h1  = HASH(ht_size,key,tmp);
770       for (k=0; k<ht_size; k++) {
771         if (!HT[(h1+k)%ht_size]) {
772           HT[(h1+k)%ht_size] = key;
773           HD[(h1+k)%ht_size] = b->a + j*bs2;
774           break;
775 #if defined(PETSC_USE_INFO)
776         } else {
777           ct++;
778 #endif
779         }
780       }
781 #if defined(PETSC_USE_INFO)
782       if (k> max) max = k;
783 #endif
784     }
785   }
786 
787   /* Print Summary */
788 #if defined(PETSC_USE_INFO)
789   for (i=0,j=0; i<ht_size; i++) {
790     if (HT[i]) j++;
791   }
792   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
793 #endif
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
799 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
800 {
801   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
802   PetscErrorCode ierr;
803   PetscInt       nstash,reallocs;
804   InsertMode     addv;
805 
806   PetscFunctionBegin;
807   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
808 
809   /* make sure all processors are either in INSERTMODE or ADDMODE */
810   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
811   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
812   mat->insertmode = addv; /* in case this processor had no cache */
813 
814   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
815   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
816   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
817   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
818   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
819   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
825 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
826 {
827   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
828   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
829   PetscErrorCode ierr;
830   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
831   PetscInt       *row,*col;
832   PetscBool      r1,r2,r3,other_disassembled;
833   MatScalar      *val;
834   InsertMode     addv = mat->insertmode;
835   PetscMPIInt    n;
836 
837   PetscFunctionBegin;
838   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
839   if (!baij->donotstash && !mat->nooffprocentries) {
840     while (1) {
841       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
842       if (!flg) break;
843 
844       for (i=0; i<n;) {
845         /* Now identify the consecutive vals belonging to the same row */
846         for (j=i,rstart=row[j]; j<n; j++) {
847           if (row[j] != rstart) break;
848         }
849         if (j < n) ncols = j-i;
850         else       ncols = n-i;
851         /* Now assemble all these values with a single function call */
852         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
853         i    = j;
854       }
855     }
856     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
857     /* Now process the block-stash. Since the values are stashed column-oriented,
858        set the roworiented flag to column oriented, and after MatSetValues()
859        restore the original flags */
860     r1 = baij->roworiented;
861     r2 = a->roworiented;
862     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
863 
864     baij->roworiented = PETSC_FALSE;
865     a->roworiented    = PETSC_FALSE;
866 
867     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
868     while (1) {
869       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
870       if (!flg) break;
871 
872       for (i=0; i<n;) {
873         /* Now identify the consecutive vals belonging to the same row */
874         for (j=i,rstart=row[j]; j<n; j++) {
875           if (row[j] != rstart) break;
876         }
877         if (j < n) ncols = j-i;
878         else       ncols = n-i;
879         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
880         i    = j;
881       }
882     }
883     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
884 
885     baij->roworiented = r1;
886     a->roworiented    = r2;
887 
888     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
889   }
890 
891   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
892   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
893 
894   /* determine if any processor has disassembled, if so we must
895      also disassemble ourselfs, in order that we may reassemble. */
896   /*
897      if nonzero structure of submatrix B cannot change then we know that
898      no processor disassembled thus we can skip this stuff
899   */
900   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
901     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
902     if (mat->was_assembled && !other_disassembled) {
903       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
904     }
905   }
906 
907   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
908     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
909   }
910   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
911   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
912   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
913 
914 #if defined(PETSC_USE_INFO)
915   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
916     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
917 
918     baij->ht_total_ct  = 0;
919     baij->ht_insert_ct = 0;
920   }
921 #endif
922   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
923     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
924 
925     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
926     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
927   }
928 
929   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
930 
931   baij->rowvalues = 0;
932   PetscFunctionReturn(0);
933 }
934 
935 #include <petscdraw.h>
936 #undef __FUNCT__
937 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
938 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
939 {
940   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
941   PetscErrorCode    ierr;
942   PetscMPIInt       size = baij->size,rank = baij->rank;
943   PetscInt          bs   = mat->rmap->bs;
944   PetscBool         iascii,isdraw;
945   PetscViewer       sviewer;
946   PetscViewerFormat format;
947 
948   PetscFunctionBegin;
949   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
950   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
951   if (iascii) {
952     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
953     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
954       MatInfo info;
955       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
956       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
957       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
958       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
959                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
960       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
961       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
962       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
963       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
964       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
965       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
966       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
967       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
968       PetscFunctionReturn(0);
969     } else if (format == PETSC_VIEWER_ASCII_INFO) {
970       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
971       PetscFunctionReturn(0);
972     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
973       PetscFunctionReturn(0);
974     }
975   }
976 
977   if (isdraw) {
978     PetscDraw draw;
979     PetscBool isnull;
980     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
981     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
982   }
983 
984   if (size == 1) {
985     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
986     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
987   } else {
988     /* assemble the entire matrix onto first processor. */
989     Mat         A;
990     Mat_SeqBAIJ *Aloc;
991     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
992     MatScalar   *a;
993 
994     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
995     /* Perhaps this should be the type of mat? */
996     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
997     if (!rank) {
998       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
999     } else {
1000       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1001     }
1002     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1003     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1004     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1005     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1006 
1007     /* copy over the A part */
1008     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1009     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1010     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1011 
1012     for (i=0; i<mbs; i++) {
1013       rvals[0] = bs*(baij->rstartbs + i);
1014       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1015       for (j=ai[i]; j<ai[i+1]; j++) {
1016         col = (baij->cstartbs+aj[j])*bs;
1017         for (k=0; k<bs; k++) {
1018           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1019           col++; a += bs;
1020         }
1021       }
1022     }
1023     /* copy over the B part */
1024     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1025     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1026     for (i=0; i<mbs; i++) {
1027       rvals[0] = bs*(baij->rstartbs + i);
1028       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1029       for (j=ai[i]; j<ai[i+1]; j++) {
1030         col = baij->garray[aj[j]]*bs;
1031         for (k=0; k<bs; k++) {
1032           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1033           col++; a += bs;
1034         }
1035       }
1036     }
1037     ierr = PetscFree(rvals);CHKERRQ(ierr);
1038     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1040     /*
1041        Everyone has to call to draw the matrix since the graphics waits are
1042        synchronized across all processors that share the PetscDraw object
1043     */
1044     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1045     if (!rank) {
1046       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1047       /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1048       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1049       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1050     }
1051     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1052     ierr = MatDestroy(&A);CHKERRQ(ierr);
1053   }
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1059 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1060 {
1061   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1062   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1063   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1064   PetscErrorCode ierr;
1065   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1066   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1067   int            fd;
1068   PetscScalar    *column_values;
1069   FILE           *file;
1070   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1071   PetscInt       message_count,flowcontrolcount;
1072 
1073   PetscFunctionBegin;
1074   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1075   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1076   nz   = bs2*(A->nz + B->nz);
1077   rlen = mat->rmap->n;
1078   if (!rank) {
1079     header[0] = MAT_FILE_CLASSID;
1080     header[1] = mat->rmap->N;
1081     header[2] = mat->cmap->N;
1082 
1083     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1084     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1085     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1086     /* get largest number of rows any processor has */
1087     range = mat->rmap->range;
1088     for (i=1; i<size; i++) {
1089       rlen = PetscMax(rlen,range[i+1] - range[i]);
1090     }
1091   } else {
1092     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1093   }
1094 
1095   ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1096   /* compute lengths of each row  */
1097   for (i=0; i<a->mbs; i++) {
1098     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1099   }
1100   /* store the row lengths to the file */
1101   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1102   if (!rank) {
1103     MPI_Status status;
1104     ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1105     rlen = (range[1] - range[0])/bs;
1106     for (i=0; i<rlen; i++) {
1107       for (j=0; j<bs; j++) {
1108         row_lens[i*bs+j] = bs*crow_lens[i];
1109       }
1110     }
1111     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1112     for (i=1; i<size; i++) {
1113       rlen = (range[i+1] - range[i])/bs;
1114       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1115       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1116       for (k=0; k<rlen; k++) {
1117         for (j=0; j<bs; j++) {
1118           row_lens[k*bs+j] = bs*crow_lens[k];
1119         }
1120       }
1121       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1122     }
1123     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1124     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1125   } else {
1126     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1127     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1128     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1129   }
1130   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1131 
1132   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1133      information needed to make it for each row from a block row. This does require more communication but still not more than
1134      the communication needed for the nonzero values  */
1135   nzmax = nz; /*  space a largest processor needs */
1136   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1137   ierr  = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1138   cnt   = 0;
1139   for (i=0; i<a->mbs; i++) {
1140     pcnt = cnt;
1141     for (j=B->i[i]; j<B->i[i+1]; j++) {
1142       if ((col = garray[B->j[j]]) > cstart) break;
1143       for (l=0; l<bs; l++) {
1144         column_indices[cnt++] = bs*col+l;
1145       }
1146     }
1147     for (k=A->i[i]; k<A->i[i+1]; k++) {
1148       for (l=0; l<bs; l++) {
1149         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1150       }
1151     }
1152     for (; j<B->i[i+1]; j++) {
1153       for (l=0; l<bs; l++) {
1154         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1155       }
1156     }
1157     len = cnt - pcnt;
1158     for (k=1; k<bs; k++) {
1159       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1160       cnt += len;
1161     }
1162   }
1163   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1164 
1165   /* store the columns to the file */
1166   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1167   if (!rank) {
1168     MPI_Status status;
1169     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1170     for (i=1; i<size; i++) {
1171       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1172       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1173       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1174       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1175     }
1176     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1177   } else {
1178     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1179     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1180     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1181     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1182   }
1183   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1184 
1185   /* load up the numerical values */
1186   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1187   cnt  = 0;
1188   for (i=0; i<a->mbs; i++) {
1189     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1190     for (j=B->i[i]; j<B->i[i+1]; j++) {
1191       if (garray[B->j[j]] > cstart) break;
1192       for (l=0; l<bs; l++) {
1193         for (ll=0; ll<bs; ll++) {
1194           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1195         }
1196       }
1197       cnt += bs;
1198     }
1199     for (k=A->i[i]; k<A->i[i+1]; k++) {
1200       for (l=0; l<bs; l++) {
1201         for (ll=0; ll<bs; ll++) {
1202           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1203         }
1204       }
1205       cnt += bs;
1206     }
1207     for (; j<B->i[i+1]; j++) {
1208       for (l=0; l<bs; l++) {
1209         for (ll=0; ll<bs; ll++) {
1210           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1211         }
1212       }
1213       cnt += bs;
1214     }
1215     cnt += (bs-1)*rlen;
1216   }
1217   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1218 
1219   /* store the column values to the file */
1220   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1221   if (!rank) {
1222     MPI_Status status;
1223     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1224     for (i=1; i<size; i++) {
1225       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1226       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1227       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1228       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1229     }
1230     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1231   } else {
1232     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1233     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1234     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1235     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1236   }
1237   ierr = PetscFree(column_values);CHKERRQ(ierr);
1238 
1239   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1240   if (file) {
1241     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1242   }
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatView_MPIBAIJ"
1248 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1249 {
1250   PetscErrorCode ierr;
1251   PetscBool      iascii,isdraw,issocket,isbinary;
1252 
1253   PetscFunctionBegin;
1254   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1255   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1256   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1257   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1258   if (iascii || isdraw || issocket) {
1259     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1260   } else if (isbinary) {
1261     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1262   }
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1268 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1269 {
1270   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274 #if defined(PETSC_USE_LOG)
1275   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1276 #endif
1277   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1278   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1279   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1280   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1281 #if defined(PETSC_USE_CTABLE)
1282   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1283 #else
1284   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1285 #endif
1286   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1287   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1288   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1289   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1290   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1291   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1292   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",NULL);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatMult_MPIBAIJ"
1310 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1311 {
1312   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1313   PetscErrorCode ierr;
1314   PetscInt       nt;
1315 
1316   PetscFunctionBegin;
1317   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1318   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1319   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1320   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1321   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1322   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1323   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1324   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1330 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1331 {
1332   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1333   PetscErrorCode ierr;
1334 
1335   PetscFunctionBegin;
1336   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1337   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1338   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1339   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1345 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1346 {
1347   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1348   PetscErrorCode ierr;
1349   PetscBool      merged;
1350 
1351   PetscFunctionBegin;
1352   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1353   /* do nondiagonal part */
1354   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1355   if (!merged) {
1356     /* send it on its way */
1357     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1358     /* do local part */
1359     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1360     /* receive remote parts: note this assumes the values are not actually */
1361     /* inserted in yy until the next line */
1362     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1363   } else {
1364     /* do local part */
1365     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1366     /* send it on its way */
1367     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1368     /* values actually were received in the Begin() but we need to call this nop */
1369     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1370   }
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1376 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1377 {
1378   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1379   PetscErrorCode ierr;
1380 
1381   PetscFunctionBegin;
1382   /* do nondiagonal part */
1383   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1384   /* send it on its way */
1385   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1386   /* do local part */
1387   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1388   /* receive remote parts: note this assumes the values are not actually */
1389   /* inserted in yy until the next line, which is true for my implementation*/
1390   /* but is not perhaps always true. */
1391   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 /*
1396   This only works correctly for square matrices where the subblock A->A is the
1397    diagonal block
1398 */
1399 #undef __FUNCT__
1400 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1401 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1402 {
1403   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1404   PetscErrorCode ierr;
1405 
1406   PetscFunctionBegin;
1407   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1408   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatScale_MPIBAIJ"
1414 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1415 {
1416   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1417   PetscErrorCode ierr;
1418 
1419   PetscFunctionBegin;
1420   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1421   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1427 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1428 {
1429   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1430   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1431   PetscErrorCode ierr;
1432   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1433   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1434   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1435 
1436   PetscFunctionBegin;
1437   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1438   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1439   mat->getrowactive = PETSC_TRUE;
1440 
1441   if (!mat->rowvalues && (idx || v)) {
1442     /*
1443         allocate enough space to hold information from the longest row.
1444     */
1445     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1446     PetscInt    max = 1,mbs = mat->mbs,tmp;
1447     for (i=0; i<mbs; i++) {
1448       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1449       if (max < tmp) max = tmp;
1450     }
1451     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1452   }
1453   lrow = row - brstart;
1454 
1455   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1456   if (!v)   {pvA = 0; pvB = 0;}
1457   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1458   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1459   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1460   nztot = nzA + nzB;
1461 
1462   cmap = mat->garray;
1463   if (v  || idx) {
1464     if (nztot) {
1465       /* Sort by increasing column numbers, assuming A and B already sorted */
1466       PetscInt imark = -1;
1467       if (v) {
1468         *v = v_p = mat->rowvalues;
1469         for (i=0; i<nzB; i++) {
1470           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1471           else break;
1472         }
1473         imark = i;
1474         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1475         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1476       }
1477       if (idx) {
1478         *idx = idx_p = mat->rowindices;
1479         if (imark > -1) {
1480           for (i=0; i<imark; i++) {
1481             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1482           }
1483         } else {
1484           for (i=0; i<nzB; i++) {
1485             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1486             else break;
1487           }
1488           imark = i;
1489         }
1490         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1491         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1492       }
1493     } else {
1494       if (idx) *idx = 0;
1495       if (v)   *v   = 0;
1496     }
1497   }
1498   *nz  = nztot;
1499   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1500   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1506 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1507 {
1508   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1509 
1510   PetscFunctionBegin;
1511   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1512   baij->getrowactive = PETSC_FALSE;
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1518 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1519 {
1520   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1521   PetscErrorCode ierr;
1522 
1523   PetscFunctionBegin;
1524   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1525   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 #undef __FUNCT__
1530 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1531 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1532 {
1533   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1534   Mat            A  = a->A,B = a->B;
1535   PetscErrorCode ierr;
1536   PetscReal      isend[5],irecv[5];
1537 
1538   PetscFunctionBegin;
1539   info->block_size = (PetscReal)matin->rmap->bs;
1540 
1541   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1542 
1543   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1544   isend[3] = info->memory;  isend[4] = info->mallocs;
1545 
1546   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1547 
1548   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1549   isend[3] += info->memory;  isend[4] += info->mallocs;
1550 
1551   if (flag == MAT_LOCAL) {
1552     info->nz_used      = isend[0];
1553     info->nz_allocated = isend[1];
1554     info->nz_unneeded  = isend[2];
1555     info->memory       = isend[3];
1556     info->mallocs      = isend[4];
1557   } else if (flag == MAT_GLOBAL_MAX) {
1558     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1559 
1560     info->nz_used      = irecv[0];
1561     info->nz_allocated = irecv[1];
1562     info->nz_unneeded  = irecv[2];
1563     info->memory       = irecv[3];
1564     info->mallocs      = irecv[4];
1565   } else if (flag == MAT_GLOBAL_SUM) {
1566     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1567 
1568     info->nz_used      = irecv[0];
1569     info->nz_allocated = irecv[1];
1570     info->nz_unneeded  = irecv[2];
1571     info->memory       = irecv[3];
1572     info->mallocs      = irecv[4];
1573   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1574   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1575   info->fill_ratio_needed = 0;
1576   info->factor_mallocs    = 0;
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1582 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1583 {
1584   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1585   PetscErrorCode ierr;
1586 
1587   PetscFunctionBegin;
1588   switch (op) {
1589   case MAT_NEW_NONZERO_LOCATIONS:
1590   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1591   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1592   case MAT_KEEP_NONZERO_PATTERN:
1593   case MAT_NEW_NONZERO_LOCATION_ERR:
1594     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1595     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1596     break;
1597   case MAT_ROW_ORIENTED:
1598     a->roworiented = flg;
1599 
1600     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1601     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1602     break;
1603   case MAT_NEW_DIAGONALS:
1604     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1605     break;
1606   case MAT_IGNORE_OFF_PROC_ENTRIES:
1607     a->donotstash = flg;
1608     break;
1609   case MAT_USE_HASH_TABLE:
1610     a->ht_flag = flg;
1611     break;
1612   case MAT_SYMMETRIC:
1613   case MAT_STRUCTURALLY_SYMMETRIC:
1614   case MAT_HERMITIAN:
1615   case MAT_SYMMETRY_ETERNAL:
1616     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1617     break;
1618   default:
1619     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1620   }
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 #undef __FUNCT__
1625 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1626 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1627 {
1628   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1629   Mat_SeqBAIJ    *Aloc;
1630   Mat            B;
1631   PetscErrorCode ierr;
1632   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1633   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1634   MatScalar      *a;
1635 
1636   PetscFunctionBegin;
1637   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1638   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1639     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1640     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1641     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1642     /* Do not know preallocation information, but must set block size */
1643     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1644   } else {
1645     B = *matout;
1646   }
1647 
1648   /* copy over the A part */
1649   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1650   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1651   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1652 
1653   for (i=0; i<mbs; i++) {
1654     rvals[0] = bs*(baij->rstartbs + i);
1655     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1656     for (j=ai[i]; j<ai[i+1]; j++) {
1657       col = (baij->cstartbs+aj[j])*bs;
1658       for (k=0; k<bs; k++) {
1659         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1660 
1661         col++; a += bs;
1662       }
1663     }
1664   }
1665   /* copy over the B part */
1666   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1667   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1668   for (i=0; i<mbs; i++) {
1669     rvals[0] = bs*(baij->rstartbs + i);
1670     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1671     for (j=ai[i]; j<ai[i+1]; j++) {
1672       col = baij->garray[aj[j]]*bs;
1673       for (k=0; k<bs; k++) {
1674         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1675         col++;
1676         a += bs;
1677       }
1678     }
1679   }
1680   ierr = PetscFree(rvals);CHKERRQ(ierr);
1681   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1682   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1683 
1684   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1685   else {
1686     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1687   }
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1693 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1694 {
1695   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1696   Mat            a     = baij->A,b = baij->B;
1697   PetscErrorCode ierr;
1698   PetscInt       s1,s2,s3;
1699 
1700   PetscFunctionBegin;
1701   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1702   if (rr) {
1703     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1704     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1705     /* Overlap communication with computation. */
1706     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1707   }
1708   if (ll) {
1709     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1710     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1711     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1712   }
1713   /* scale  the diagonal block */
1714   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1715 
1716   if (rr) {
1717     /* Do a scatter end and then right scale the off-diagonal block */
1718     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1719     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1720   }
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 #undef __FUNCT__
1725 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1726 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1727 {
1728   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1729   PetscErrorCode    ierr;
1730   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1731   PetscInt          i,*owners = A->rmap->range;
1732   PetscInt          *nprocs,j,idx,nsends,row;
1733   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1734   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1735   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1736   MPI_Comm          comm;
1737   MPI_Request       *send_waits,*recv_waits;
1738   MPI_Status        recv_status,*send_status;
1739   const PetscScalar *xx;
1740   PetscScalar       *bb;
1741 #if defined(PETSC_DEBUG)
1742   PetscBool         found = PETSC_FALSE;
1743 #endif
1744 
1745   PetscFunctionBegin;
1746   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1747   /*  first count number of contributors to each processor */
1748   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1749   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1750   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);  /* see note*/
1751   j    = 0;
1752   for (i=0; i<N; i++) {
1753     if (lastidx > (idx = rows[i])) j = 0;
1754     lastidx = idx;
1755     for (; j<size; j++) {
1756       if (idx >= owners[j] && idx < owners[j+1]) {
1757         nprocs[2*j]++;
1758         nprocs[2*j+1] = 1;
1759         owner[i]      = j;
1760 #if defined(PETSC_DEBUG)
1761         found = PETSC_TRUE;
1762 #endif
1763         break;
1764       }
1765     }
1766 #if defined(PETSC_DEBUG)
1767     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1768     found = PETSC_FALSE;
1769 #endif
1770   }
1771   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
1772 
1773   if (A->nooffproczerorows) {
1774     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1775     nrecvs = nsends;
1776     nmax   = N;
1777   } else {
1778     /* inform other processors of number of messages and max length*/
1779     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1780   }
1781 
1782   /* post receives:   */
1783   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1784   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1785   for (i=0; i<nrecvs; i++) {
1786     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1787   }
1788 
1789   /* do sends:
1790      1) starts[i] gives the starting index in svalues for stuff going to
1791      the ith processor
1792   */
1793   ierr      = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1794   ierr      = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1795   ierr      = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1796   starts[0] = 0;
1797   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1798   for (i=0; i<N; i++) {
1799     svalues[starts[owner[i]]++] = rows[i];
1800   }
1801 
1802   starts[0] = 0;
1803   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1804   count = 0;
1805   for (i=0; i<size; i++) {
1806     if (nprocs[2*i+1]) {
1807       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1808     }
1809   }
1810   ierr = PetscFree(starts);CHKERRQ(ierr);
1811 
1812   base = owners[rank];
1813 
1814   /*  wait on receives */
1815   ierr  = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1816   count = nrecvs;
1817   slen  = 0;
1818   while (count) {
1819     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1820     /* unpack receives into our local space */
1821     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1822 
1823     source[imdex] = recv_status.MPI_SOURCE;
1824     lens[imdex]   = n;
1825     slen         += n;
1826     count--;
1827   }
1828   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1829 
1830   /* move the data into the send scatter */
1831   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1832   count = 0;
1833   for (i=0; i<nrecvs; i++) {
1834     values = rvalues + i*nmax;
1835     for (j=0; j<lens[i]; j++) {
1836       lrows[count++] = values[j] - base;
1837     }
1838   }
1839   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1840   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1841   ierr = PetscFree(owner);CHKERRQ(ierr);
1842   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1843 
1844   /* fix right hand side if needed */
1845   if (x && b) {
1846     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1847     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1848     for (i=0; i<slen; i++) {
1849       bb[lrows[i]] = diag*xx[lrows[i]];
1850     }
1851     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1852     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1853   }
1854 
1855   /* actually zap the local rows */
1856   /*
1857         Zero the required rows. If the "diagonal block" of the matrix
1858      is square and the user wishes to set the diagonal we use separate
1859      code so that MatSetValues() is not called for each diagonal allocating
1860      new memory, thus calling lots of mallocs and slowing things down.
1861 
1862   */
1863   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1864   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1865   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1866     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1867   } else if (diag != 0.0) {
1868     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1869     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1870        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1871     for (i=0; i<slen; i++) {
1872       row  = lrows[i] + rstart_bs;
1873       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1874     }
1875     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1876     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1877   } else {
1878     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1879   }
1880 
1881   ierr = PetscFree(lrows);CHKERRQ(ierr);
1882 
1883   /* wait on sends */
1884   if (nsends) {
1885     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1886     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1887     ierr = PetscFree(send_status);CHKERRQ(ierr);
1888   }
1889   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1890   ierr = PetscFree(svalues);CHKERRQ(ierr);
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1896 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1897 {
1898   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1899   PetscErrorCode ierr;
1900 
1901   PetscFunctionBegin;
1902   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1907 
1908 #undef __FUNCT__
1909 #define __FUNCT__ "MatEqual_MPIBAIJ"
1910 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1911 {
1912   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1913   Mat            a,b,c,d;
1914   PetscBool      flg;
1915   PetscErrorCode ierr;
1916 
1917   PetscFunctionBegin;
1918   a = matA->A; b = matA->B;
1919   c = matB->A; d = matB->B;
1920 
1921   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1922   if (flg) {
1923     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1924   }
1925   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatCopy_MPIBAIJ"
1931 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1932 {
1933   PetscErrorCode ierr;
1934   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1935   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1936 
1937   PetscFunctionBegin;
1938   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1939   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1940     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1941   } else {
1942     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1943     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1944   }
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1950 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1951 {
1952   PetscErrorCode ierr;
1953 
1954   PetscFunctionBegin;
1955   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 #undef __FUNCT__
1960 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1961 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1962 {
1963   PetscErrorCode ierr;
1964   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1965   PetscBLASInt   bnz,one=1;
1966   Mat_SeqBAIJ    *x,*y;
1967 
1968   PetscFunctionBegin;
1969   if (str == SAME_NONZERO_PATTERN) {
1970     PetscScalar alpha = a;
1971     x    = (Mat_SeqBAIJ*)xx->A->data;
1972     y    = (Mat_SeqBAIJ*)yy->A->data;
1973     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1974     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1975     x    = (Mat_SeqBAIJ*)xx->B->data;
1976     y    = (Mat_SeqBAIJ*)yy->B->data;
1977     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1978     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1979   } else {
1980     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1981   }
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 #undef __FUNCT__
1986 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1987 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1988 {
1989   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1990   PetscErrorCode ierr;
1991 
1992   PetscFunctionBegin;
1993   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1994   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2000 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2001 {
2002   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2003   PetscErrorCode ierr;
2004 
2005   PetscFunctionBegin;
2006   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2007   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 #undef __FUNCT__
2012 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2013 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2014 {
2015   PetscErrorCode ierr;
2016   IS             iscol_local;
2017   PetscInt       csize;
2018 
2019   PetscFunctionBegin;
2020   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2021   if (call == MAT_REUSE_MATRIX) {
2022     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2023     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2024   } else {
2025     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2026   }
2027   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2028   if (call == MAT_INITIAL_MATRIX) {
2029     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2030     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2031   }
2032   PetscFunctionReturn(0);
2033 }
2034 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2035 #undef __FUNCT__
2036 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2037 /*
2038   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2039   in local and then by concatenating the local matrices the end result.
2040   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2041 */
2042 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2043 {
2044   PetscErrorCode ierr;
2045   PetscMPIInt    rank,size;
2046   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2047   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2048   Mat            M,Mreuse;
2049   MatScalar      *vwork,*aa;
2050   MPI_Comm       comm;
2051   IS             isrow_new, iscol_new;
2052   PetscBool      idflag,allrows, allcols;
2053   Mat_SeqBAIJ    *aij;
2054 
2055   PetscFunctionBegin;
2056   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2057   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2058   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2059   /* The compression and expansion should be avoided. Doesn't point
2060      out errors, might change the indices, hence buggey */
2061   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2062   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2063 
2064   /* Check for special case: each processor gets entire matrix columns */
2065   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2066   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2067   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2068   else allcols = PETSC_FALSE;
2069 
2070   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2071   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2072   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2073   else allrows = PETSC_FALSE;
2074 
2075   if (call ==  MAT_REUSE_MATRIX) {
2076     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2077     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2078     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2079   } else {
2080     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2081   }
2082   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2083   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2084   /*
2085       m - number of local rows
2086       n - number of columns (same on all processors)
2087       rstart - first row in new global matrix generated
2088   */
2089   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2090   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2091   m    = m/bs;
2092   n    = n/bs;
2093 
2094   if (call == MAT_INITIAL_MATRIX) {
2095     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2096     ii  = aij->i;
2097     jj  = aij->j;
2098 
2099     /*
2100         Determine the number of non-zeros in the diagonal and off-diagonal
2101         portions of the matrix in order to do correct preallocation
2102     */
2103 
2104     /* first get start and end of "diagonal" columns */
2105     if (csize == PETSC_DECIDE) {
2106       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2107       if (mglobal == n*bs) { /* square matrix */
2108         nlocal = m;
2109       } else {
2110         nlocal = n/size + ((n % size) > rank);
2111       }
2112     } else {
2113       nlocal = csize/bs;
2114     }
2115     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2116     rstart = rend - nlocal;
2117     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2118 
2119     /* next, compute all the lengths */
2120     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2121     olens = dlens + m;
2122     for (i=0; i<m; i++) {
2123       jend = ii[i+1] - ii[i];
2124       olen = 0;
2125       dlen = 0;
2126       for (j=0; j<jend; j++) {
2127         if (*jj < rstart || *jj >= rend) olen++;
2128         else dlen++;
2129         jj++;
2130       }
2131       olens[i] = olen;
2132       dlens[i] = dlen;
2133     }
2134     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2135     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2136     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2137     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2138     ierr = PetscFree(dlens);CHKERRQ(ierr);
2139   } else {
2140     PetscInt ml,nl;
2141 
2142     M    = *newmat;
2143     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2144     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2145     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2146     /*
2147          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2148        rather than the slower MatSetValues().
2149     */
2150     M->was_assembled = PETSC_TRUE;
2151     M->assembled     = PETSC_FALSE;
2152   }
2153   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2154   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2155   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2156   ii   = aij->i;
2157   jj   = aij->j;
2158   aa   = aij->a;
2159   for (i=0; i<m; i++) {
2160     row   = rstart/bs + i;
2161     nz    = ii[i+1] - ii[i];
2162     cwork = jj;     jj += nz;
2163     vwork = aa;     aa += nz*bs*bs;
2164     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2165   }
2166 
2167   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2168   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2169   *newmat = M;
2170 
2171   /* save submatrix used in processor for next request */
2172   if (call ==  MAT_INITIAL_MATRIX) {
2173     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2174     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2175   }
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 #undef __FUNCT__
2180 #define __FUNCT__ "MatPermute_MPIBAIJ"
2181 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2182 {
2183   MPI_Comm       comm,pcomm;
2184   PetscInt       first,rlocal_size,clocal_size,nrows;
2185   const PetscInt *rows;
2186   PetscMPIInt    size;
2187   IS             crowp,growp,irowp,lrowp,lcolp;
2188   PetscErrorCode ierr;
2189 
2190   PetscFunctionBegin;
2191   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2192   /* make a collective version of 'rowp' */
2193   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2194   if (pcomm==comm) {
2195     crowp = rowp;
2196   } else {
2197     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2198     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2199     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2200     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2201   }
2202   /* collect the global row permutation and invert it */
2203   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2204   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2205   if (pcomm!=comm) {
2206     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2207   }
2208   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2209   ierr = ISDestroy(&growp);CHKERRQ(ierr);
2210   /* get the local target indices */
2211   ierr = MatGetOwnershipRange(A,&first,NULL);CHKERRQ(ierr);
2212   ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr);
2213   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2214   ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2215   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2216   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2217   /* the column permutation is so much easier;
2218      make a local version of 'colp' and invert it */
2219   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2220   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2221   if (size==1) {
2222     lcolp = colp;
2223   } else {
2224     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2225   }
2226   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2227   /* now we just get the submatrix */
2228   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2229   if (size>1) {
2230     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2231   }
2232   /* clean up */
2233   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2234   PetscFunctionReturn(0);
2235 }
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2239 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2240 {
2241   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2242   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2243 
2244   PetscFunctionBegin;
2245   if (nghosts) *nghosts = B->nbs;
2246   if (ghosts) *ghosts = baij->garray;
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2254 /*
2255     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2256 */
2257 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2258 {
2259   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2260   PetscErrorCode         ierr;
2261   PetscMPIInt            size,*ncolsonproc,*disp,nn;
2262   PetscInt               bs,i,n,nrows,j,k,m,ncols,col;
2263   const PetscInt         *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj,*ltog;
2264   PetscInt               nis = iscoloring->n,nctot,*cols;
2265   PetscInt               *rowhit,M,cstart,cend,colb;
2266   PetscInt               *columnsforrow,l;
2267   IS                     *isa;
2268   PetscBool              done,flg;
2269   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2270   PetscInt               ctype=c->ctype;
2271 
2272   PetscFunctionBegin;
2273   if (!mat->assembled) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2274   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2275 
2276   if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,&ltog);CHKERRQ(ierr);}
2277   else     ltog = NULL;
2278   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2279   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2280 
2281   M         = mat->rmap->n/bs;
2282   cstart    = mat->cmap->rstart/bs;
2283   cend      = mat->cmap->rend/bs;
2284   c->M      = mat->rmap->N/bs;         /* set the global rows and columns and local rows */
2285   c->N      = mat->cmap->N/bs;
2286   c->m      = mat->rmap->n/bs;
2287   c->rstart = mat->rmap->rstart/bs;
2288 
2289   c->ncolors = nis;
2290   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2291   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2292   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2293   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2294   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2295   ierr       = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2296 
2297   /* Allow access to data structures of local part of matrix */
2298   if (!baij->colmap) {
2299     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2300   }
2301   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2302   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2303 
2304   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2305   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2306 
2307   for (i=0; i<nis; i++) {
2308     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2309     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2310 
2311     c->ncolumns[i] = n;
2312     if (n) {
2313       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2314       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2315       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2316     } else {
2317       c->columns[i] = 0;
2318     }
2319 
2320     if (ctype == IS_COLORING_GLOBAL) {
2321       /* Determine the total (parallel) number of columns of this color */
2322       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
2323       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2324 
2325       ierr  = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
2326       ierr  = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2327       nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j];
2328       if (!nctot) {
2329         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2330       }
2331 
2332       disp[0] = 0;
2333       for (j=1; j<size; j++) {
2334         disp[j] = disp[j-1] + ncolsonproc[j-1];
2335       }
2336 
2337       /* Get complete list of columns for color on each processor */
2338       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2339       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2340       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2341     } else if (ctype == IS_COLORING_GHOSTED) {
2342       /* Determine local number of columns of this color on this process, including ghost points */
2343       nctot = n;
2344       ierr  = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2345       ierr  = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2346     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2347 
2348     /*
2349        Mark all rows affect by these columns
2350     */
2351     /* Temporary option to allow for debugging/testing */
2352     flg  = PETSC_FALSE;
2353     ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
2354     if (!flg) { /*-----------------------------------------------------------------------------*/
2355       /* crude, fast version */
2356       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2357       /* loop over columns*/
2358       for (j=0; j<nctot; j++) {
2359         if (ctype == IS_COLORING_GHOSTED) {
2360           col = ltog[cols[j]];
2361         } else {
2362           col = cols[j];
2363         }
2364         if (col >= cstart && col < cend) {
2365           /* column is in diagonal block of matrix */
2366           rows = A_cj + A_ci[col-cstart];
2367           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2368         } else {
2369 #if defined(PETSC_USE_CTABLE)
2370           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2371           colb--;
2372 #else
2373           colb = baij->colmap[col] - 1;
2374 #endif
2375           if (colb == -1) {
2376             m = 0;
2377           } else {
2378             colb = colb/bs;
2379             rows = B_cj + B_ci[colb];
2380             m    = B_ci[colb+1] - B_ci[colb];
2381           }
2382         }
2383         /* loop over columns marking them in rowhit */
2384         for (k=0; k<m; k++) {
2385           rowhit[*rows++] = col + 1;
2386         }
2387       }
2388 
2389       /* count the number of hits */
2390       nrows = 0;
2391       for (j=0; j<M; j++) {
2392         if (rowhit[j]) nrows++;
2393       }
2394       c->nrows[i] = nrows;
2395       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2396       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2397       ierr        = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2398       nrows       = 0;
2399       for (j=0; j<M; j++) {
2400         if (rowhit[j]) {
2401           c->rows[i][nrows]          = j;
2402           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2403           nrows++;
2404         }
2405       }
2406     } else { /*-------------------------------------------------------------------------------*/
2407       /* slow version, using rowhit as a linked list */
2408       PetscInt currentcol,fm,mfm;
2409       rowhit[M] = M;
2410       nrows     = 0;
2411       /* loop over columns*/
2412       for (j=0; j<nctot; j++) {
2413         if (ctype == IS_COLORING_GHOSTED) {
2414           col = ltog[cols[j]];
2415         } else {
2416           col = cols[j];
2417         }
2418         if (col >= cstart && col < cend) {
2419           /* column is in diagonal block of matrix */
2420           rows = A_cj + A_ci[col-cstart];
2421           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2422         } else {
2423 #if defined(PETSC_USE_CTABLE)
2424           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2425           colb--;
2426 #else
2427           colb = baij->colmap[col] - 1;
2428 #endif
2429           if (colb == -1) {
2430             m = 0;
2431           } else {
2432             colb = colb/bs;
2433             rows = B_cj + B_ci[colb];
2434             m    = B_ci[colb+1] - B_ci[colb];
2435           }
2436         }
2437 
2438         /* loop over columns marking them in rowhit */
2439         fm = M;    /* fm points to first entry in linked list */
2440         for (k=0; k<m; k++) {
2441           currentcol = *rows++;
2442           /* is it already in the list? */
2443           do {
2444             mfm = fm;
2445             fm  = rowhit[fm];
2446           } while (fm < currentcol);
2447           /* not in list so add it */
2448           if (fm != currentcol) {
2449             nrows++;
2450             columnsforrow[currentcol] = col;
2451             /* next three lines insert new entry into linked list */
2452             rowhit[mfm]        = currentcol;
2453             rowhit[currentcol] = fm;
2454             fm                 = currentcol;
2455             /* fm points to present position in list since we know the columns are sorted */
2456           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2457         }
2458       }
2459       c->nrows[i] = nrows;
2460       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2461       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2462       ierr        = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2463       /* now store the linked list of rows into c->rows[i] */
2464       nrows = 0;
2465       fm    = rowhit[M];
2466       do {
2467         c->rows[i][nrows]            = fm;
2468         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2469         fm                           = rowhit[fm];
2470       } while (fm < M);
2471     } /* ---------------------------------------------------------------------------------------*/
2472     ierr = PetscFree(cols);CHKERRQ(ierr);
2473   }
2474 
2475   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2476   /*
2477        vscale will contain the "diagonal" on processor scalings followed by the off processor
2478   */
2479   if (ctype == IS_COLORING_GLOBAL) {
2480     PetscInt *garray;
2481     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2482     for (i=0; i<baij->B->cmap->n/bs; i++) {
2483       for (j=0; j<bs; j++) {
2484         garray[i*bs+j] = bs*baij->garray[i]+j;
2485       }
2486     }
2487     ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2488     ierr = PetscFree(garray);CHKERRQ(ierr);
2489     CHKMEMQ;
2490     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2491     for (k=0; k<c->ncolors; k++) {
2492       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2493       for (l=0; l<c->nrows[k]; l++) {
2494         col = c->columnsforrow[k][l];
2495         if (col >= cstart && col < cend) {
2496           /* column is in diagonal block of matrix */
2497           colb = col - cstart;
2498         } else {
2499           /* column  is in "off-processor" part */
2500 #if defined(PETSC_USE_CTABLE)
2501           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2502           colb--;
2503 #else
2504           colb = baij->colmap[col] - 1;
2505 #endif
2506           colb  = colb/bs;
2507           colb += cend - cstart;
2508         }
2509         c->vscaleforrow[k][l] = colb;
2510       }
2511     }
2512   } else if (ctype == IS_COLORING_GHOSTED) {
2513     /* Get gtol mapping */
2514     PetscInt N = mat->cmap->N,nlocal,*gtol;
2515     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2516     for (i=0; i<N; i++) gtol[i] = -1;
2517     ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr);
2518     for (i=0; i<nlocal; i++) gtol[ltog[i]] = i;
2519 
2520     c->vscale = 0; /* will be created in MatFDColoringApply() */
2521     ierr      = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2522     for (k=0; k<c->ncolors; k++) {
2523       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2524       for (l=0; l<c->nrows[k]; l++) {
2525         col = c->columnsforrow[k][l];      /* global column index */
2526 
2527         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2528       }
2529     }
2530     ierr = PetscFree(gtol);CHKERRQ(ierr);
2531   }
2532   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2533 
2534   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2535   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2536   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2537   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2538   if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,&ltog);CHKERRQ(ierr);}
2539   CHKMEMQ;
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 #undef __FUNCT__
2544 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2545 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2546 {
2547   Mat            B;
2548   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2549   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2550   Mat_SeqAIJ     *b;
2551   PetscErrorCode ierr;
2552   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2553   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2554   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2555 
2556   PetscFunctionBegin;
2557   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2558   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2559 
2560   /* ----------------------------------------------------------------
2561      Tell every processor the number of nonzeros per row
2562   */
2563   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2564   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2565     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2566   }
2567   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2568   ierr      = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2569   displs    = recvcounts + size;
2570   for (i=0; i<size; i++) {
2571     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2572     displs[i]     = A->rmap->range[i]/bs;
2573   }
2574 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2575   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2576 #else
2577   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2578 #endif
2579   /* ---------------------------------------------------------------
2580      Create the sequential matrix of the same type as the local block diagonal
2581   */
2582   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2583   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2584   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2585   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2586   b    = (Mat_SeqAIJ*)B->data;
2587 
2588   /*--------------------------------------------------------------------
2589     Copy my part of matrix column indices over
2590   */
2591   sendcount  = ad->nz + bd->nz;
2592   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2593   a_jsendbuf = ad->j;
2594   b_jsendbuf = bd->j;
2595   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2596   cnt        = 0;
2597   for (i=0; i<n; i++) {
2598 
2599     /* put in lower diagonal portion */
2600     m = bd->i[i+1] - bd->i[i];
2601     while (m > 0) {
2602       /* is it above diagonal (in bd (compressed) numbering) */
2603       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2604       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2605       m--;
2606     }
2607 
2608     /* put in diagonal portion */
2609     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2610       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2611     }
2612 
2613     /* put in upper diagonal portion */
2614     while (m-- > 0) {
2615       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2616     }
2617   }
2618   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2619 
2620   /*--------------------------------------------------------------------
2621     Gather all column indices to all processors
2622   */
2623   for (i=0; i<size; i++) {
2624     recvcounts[i] = 0;
2625     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2626       recvcounts[i] += lens[j];
2627     }
2628   }
2629   displs[0] = 0;
2630   for (i=1; i<size; i++) {
2631     displs[i] = displs[i-1] + recvcounts[i-1];
2632   }
2633 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2634   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2635 #else
2636   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2637 #endif
2638   /*--------------------------------------------------------------------
2639     Assemble the matrix into useable form (note numerical values not yet set)
2640   */
2641   /* set the b->ilen (length of each row) values */
2642   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2643   /* set the b->i indices */
2644   b->i[0] = 0;
2645   for (i=1; i<=A->rmap->N/bs; i++) {
2646     b->i[i] = b->i[i-1] + lens[i-1];
2647   }
2648   ierr = PetscFree(lens);CHKERRQ(ierr);
2649   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2650   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2651   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2652 
2653   if (A->symmetric) {
2654     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2655   } else if (A->hermitian) {
2656     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2657   } else if (A->structurally_symmetric) {
2658     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2659   }
2660   *newmat = B;
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 #undef __FUNCT__
2665 #define __FUNCT__ "MatSOR_MPIBAIJ"
2666 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2667 {
2668   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2669   PetscErrorCode ierr;
2670   Vec            bb1 = 0;
2671 
2672   PetscFunctionBegin;
2673   if (flag == SOR_APPLY_UPPER) {
2674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2675     PetscFunctionReturn(0);
2676   }
2677 
2678   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2679     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2680   }
2681 
2682   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2683     if (flag & SOR_ZERO_INITIAL_GUESS) {
2684       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2685       its--;
2686     }
2687 
2688     while (its--) {
2689       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2690       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2691 
2692       /* update rhs: bb1 = bb - B*x */
2693       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2694       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2695 
2696       /* local sweep */
2697       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2698     }
2699   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2700     if (flag & SOR_ZERO_INITIAL_GUESS) {
2701       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2702       its--;
2703     }
2704     while (its--) {
2705       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2706       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2707 
2708       /* update rhs: bb1 = bb - B*x */
2709       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2710       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2711 
2712       /* local sweep */
2713       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2714     }
2715   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2716     if (flag & SOR_ZERO_INITIAL_GUESS) {
2717       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2718       its--;
2719     }
2720     while (its--) {
2721       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2722       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2723 
2724       /* update rhs: bb1 = bb - B*x */
2725       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2726       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2727 
2728       /* local sweep */
2729       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2730     }
2731   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2732 
2733   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2734   PetscFunctionReturn(0);
2735 }
2736 
2737 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2738 
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2741 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2742 {
2743   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 
2752 /* -------------------------------------------------------------------*/
2753 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2754                                        MatGetRow_MPIBAIJ,
2755                                        MatRestoreRow_MPIBAIJ,
2756                                        MatMult_MPIBAIJ,
2757                                 /* 4*/ MatMultAdd_MPIBAIJ,
2758                                        MatMultTranspose_MPIBAIJ,
2759                                        MatMultTransposeAdd_MPIBAIJ,
2760                                        0,
2761                                        0,
2762                                        0,
2763                                 /*10*/ 0,
2764                                        0,
2765                                        0,
2766                                        MatSOR_MPIBAIJ,
2767                                        MatTranspose_MPIBAIJ,
2768                                 /*15*/ MatGetInfo_MPIBAIJ,
2769                                        MatEqual_MPIBAIJ,
2770                                        MatGetDiagonal_MPIBAIJ,
2771                                        MatDiagonalScale_MPIBAIJ,
2772                                        MatNorm_MPIBAIJ,
2773                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2774                                        MatAssemblyEnd_MPIBAIJ,
2775                                        MatSetOption_MPIBAIJ,
2776                                        MatZeroEntries_MPIBAIJ,
2777                                 /*24*/ MatZeroRows_MPIBAIJ,
2778                                        0,
2779                                        0,
2780                                        0,
2781                                        0,
2782                                 /*29*/ MatSetUp_MPIBAIJ,
2783                                        0,
2784                                        0,
2785                                        0,
2786                                        0,
2787                                 /*34*/ MatDuplicate_MPIBAIJ,
2788                                        0,
2789                                        0,
2790                                        0,
2791                                        0,
2792                                 /*39*/ MatAXPY_MPIBAIJ,
2793                                        MatGetSubMatrices_MPIBAIJ,
2794                                        MatIncreaseOverlap_MPIBAIJ,
2795                                        MatGetValues_MPIBAIJ,
2796                                        MatCopy_MPIBAIJ,
2797                                 /*44*/ 0,
2798                                        MatScale_MPIBAIJ,
2799                                        0,
2800                                        0,
2801                                        0,
2802                                 /*49*/ 0,
2803                                        0,
2804                                        0,
2805                                        0,
2806                                        0,
2807                                 /*54*/ MatFDColoringCreate_MPIBAIJ,
2808                                        0,
2809                                        MatSetUnfactored_MPIBAIJ,
2810                                        MatPermute_MPIBAIJ,
2811                                        MatSetValuesBlocked_MPIBAIJ,
2812                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2813                                        MatDestroy_MPIBAIJ,
2814                                        MatView_MPIBAIJ,
2815                                        0,
2816                                        0,
2817                                 /*64*/ 0,
2818                                        0,
2819                                        0,
2820                                        0,
2821                                        0,
2822                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2823                                        0,
2824                                        0,
2825                                        0,
2826                                        0,
2827                                 /*74*/ 0,
2828                                        MatFDColoringApply_BAIJ,
2829                                        0,
2830                                        0,
2831                                        0,
2832                                 /*79*/ 0,
2833                                        0,
2834                                        0,
2835                                        0,
2836                                        MatLoad_MPIBAIJ,
2837                                 /*84*/ 0,
2838                                        0,
2839                                        0,
2840                                        0,
2841                                        0,
2842                                 /*89*/ 0,
2843                                        0,
2844                                        0,
2845                                        0,
2846                                        0,
2847                                 /*94*/ 0,
2848                                        0,
2849                                        0,
2850                                        0,
2851                                        0,
2852                                 /*99*/ 0,
2853                                        0,
2854                                        0,
2855                                        0,
2856                                        0,
2857                                 /*104*/0,
2858                                        MatRealPart_MPIBAIJ,
2859                                        MatImaginaryPart_MPIBAIJ,
2860                                        0,
2861                                        0,
2862                                 /*109*/0,
2863                                        0,
2864                                        0,
2865                                        0,
2866                                        0,
2867                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2868                                        0,
2869                                        MatGetGhosts_MPIBAIJ,
2870                                        0,
2871                                        0,
2872                                 /*119*/0,
2873                                        0,
2874                                        0,
2875                                        0,
2876                                        0,
2877                                 /*124*/0,
2878                                        0,
2879                                        MatInvertBlockDiagonal_MPIBAIJ,
2880                                        0,
2881                                        0,
2882                                /*129*/ 0,
2883                                        0,
2884                                        0,
2885                                        0,
2886                                        0,
2887                                /*134*/ 0,
2888                                        0,
2889                                        0,
2890                                        0,
2891                                        0,
2892                                /*139*/ 0,
2893                                        0
2894 };
2895 
2896 #undef __FUNCT__
2897 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2898 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2899 {
2900   PetscFunctionBegin;
2901   *a = ((Mat_MPIBAIJ*)A->data)->A;
2902   PetscFunctionReturn(0);
2903 }
2904 
2905 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2906 
2907 #undef __FUNCT__
2908 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2909 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2910 {
2911   PetscInt       m,rstart,cstart,cend;
2912   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2913   const PetscInt *JJ    =0;
2914   PetscScalar    *values=0;
2915   PetscErrorCode ierr;
2916 
2917   PetscFunctionBegin;
2918   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2919   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2920   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2921   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2922   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2923   m      = B->rmap->n/bs;
2924   rstart = B->rmap->rstart/bs;
2925   cstart = B->cmap->rstart/bs;
2926   cend   = B->cmap->rend/bs;
2927 
2928   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2929   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2930   for (i=0; i<m; i++) {
2931     nz = ii[i+1] - ii[i];
2932     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2933     nz_max = PetscMax(nz_max,nz);
2934     JJ     = jj + ii[i];
2935     for (j=0; j<nz; j++) {
2936       if (*JJ >= cstart) break;
2937       JJ++;
2938     }
2939     d = 0;
2940     for (; j<nz; j++) {
2941       if (*JJ++ >= cend) break;
2942       d++;
2943     }
2944     d_nnz[i] = d;
2945     o_nnz[i] = nz - d;
2946   }
2947   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2948   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2949 
2950   values = (PetscScalar*)V;
2951   if (!values) {
2952     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2953     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2954   }
2955   for (i=0; i<m; i++) {
2956     PetscInt          row    = i + rstart;
2957     PetscInt          ncols  = ii[i+1] - ii[i];
2958     const PetscInt    *icols = jj + ii[i];
2959     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2960     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2961   }
2962 
2963   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2964   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2965   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2966   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2967   PetscFunctionReturn(0);
2968 }
2969 
2970 #undef __FUNCT__
2971 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2972 /*@C
2973    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2974    (the default parallel PETSc format).
2975 
2976    Collective on MPI_Comm
2977 
2978    Input Parameters:
2979 +  A - the matrix
2980 .  bs - the block size
2981 .  i - the indices into j for the start of each local row (starts with zero)
2982 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2983 -  v - optional values in the matrix
2984 
2985    Level: developer
2986 
2987 .keywords: matrix, aij, compressed row, sparse, parallel
2988 
2989 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2990 @*/
2991 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2992 {
2993   PetscErrorCode ierr;
2994 
2995   PetscFunctionBegin;
2996   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2997   PetscValidType(B,1);
2998   PetscValidLogicalCollectiveInt(B,bs,2);
2999   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3000   PetscFunctionReturn(0);
3001 }
3002 
3003 #undef __FUNCT__
3004 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3005 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
3006 {
3007   Mat_MPIBAIJ    *b;
3008   PetscErrorCode ierr;
3009   PetscInt       i;
3010 
3011   PetscFunctionBegin;
3012   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3013   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3014   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3015   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3016   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3017 
3018   if (d_nnz) {
3019     for (i=0; i<B->rmap->n/bs; i++) {
3020       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3021     }
3022   }
3023   if (o_nnz) {
3024     for (i=0; i<B->rmap->n/bs; i++) {
3025       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3026     }
3027   }
3028 
3029   b      = (Mat_MPIBAIJ*)B->data;
3030   b->bs2 = bs*bs;
3031   b->mbs = B->rmap->n/bs;
3032   b->nbs = B->cmap->n/bs;
3033   b->Mbs = B->rmap->N/bs;
3034   b->Nbs = B->cmap->N/bs;
3035 
3036   for (i=0; i<=b->size; i++) {
3037     b->rangebs[i] = B->rmap->range[i]/bs;
3038   }
3039   b->rstartbs = B->rmap->rstart/bs;
3040   b->rendbs   = B->rmap->rend/bs;
3041   b->cstartbs = B->cmap->rstart/bs;
3042   b->cendbs   = B->cmap->rend/bs;
3043 
3044   if (!B->preallocated) {
3045     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3046     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3047     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3048     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3049     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3050     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3051     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3052     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3053     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
3054   }
3055 
3056   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3057   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3058   B->preallocated = PETSC_TRUE;
3059   PetscFunctionReturn(0);
3060 }
3061 
3062 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3063 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3064 
3065 #undef __FUNCT__
3066 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3067 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3068 {
3069   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3070   PetscErrorCode ierr;
3071   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3072   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3073   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3074 
3075   PetscFunctionBegin;
3076   ierr  = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3077   ii[0] = 0;
3078   CHKMEMQ;
3079   for (i=0; i<M; i++) {
3080     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3081     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3082     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3083     /* remove one from count of matrix has diagonal */
3084     for (j=id[i]; j<id[i+1]; j++) {
3085       if (jd[j] == i) {ii[i+1]--;break;}
3086     }
3087     CHKMEMQ;
3088   }
3089   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3090   cnt  = 0;
3091   for (i=0; i<M; i++) {
3092     for (j=io[i]; j<io[i+1]; j++) {
3093       if (garray[jo[j]] > rstart) break;
3094       jj[cnt++] = garray[jo[j]];
3095       CHKMEMQ;
3096     }
3097     for (k=id[i]; k<id[i+1]; k++) {
3098       if (jd[k] != i) {
3099         jj[cnt++] = rstart + jd[k];
3100         CHKMEMQ;
3101       }
3102     }
3103     for (; j<io[i+1]; j++) {
3104       jj[cnt++] = garray[jo[j]];
3105       CHKMEMQ;
3106     }
3107   }
3108   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
3109   PetscFunctionReturn(0);
3110 }
3111 
3112 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3113 
3114 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3115 
3116 #undef __FUNCT__
3117 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3118 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3119 {
3120   PetscErrorCode ierr;
3121   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3122   Mat            B;
3123   Mat_MPIAIJ     *b;
3124 
3125   PetscFunctionBegin;
3126   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
3127 
3128   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3129   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3130   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3131   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3132   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
3133   b    = (Mat_MPIAIJ*) B->data;
3134 
3135   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3136   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3137   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3138   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3139   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3140   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3141   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3142   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3143   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3144   if (reuse == MAT_REUSE_MATRIX) {
3145     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3146   } else {
3147    *newmat = B;
3148   }
3149   PetscFunctionReturn(0);
3150 }
3151 
3152 #if defined(PETSC_HAVE_MUMPS)
3153 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3154 #endif
3155 
3156 /*MC
3157    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3158 
3159    Options Database Keys:
3160 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3161 . -mat_block_size <bs> - set the blocksize used to store the matrix
3162 - -mat_use_hash_table <fact>
3163 
3164   Level: beginner
3165 
3166 .seealso: MatCreateMPIBAIJ
3167 M*/
3168 
3169 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3170 
3171 #undef __FUNCT__
3172 #define __FUNCT__ "MatCreate_MPIBAIJ"
3173 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3174 {
3175   Mat_MPIBAIJ    *b;
3176   PetscErrorCode ierr;
3177   PetscBool      flg;
3178 
3179   PetscFunctionBegin;
3180   ierr    = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3181   B->data = (void*)b;
3182 
3183   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3184   B->assembled = PETSC_FALSE;
3185 
3186   B->insertmode = NOT_SET_VALUES;
3187   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3188   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3189 
3190   /* build local table of row and column ownerships */
3191   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3192 
3193   /* build cache for off array entries formed */
3194   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3195 
3196   b->donotstash  = PETSC_FALSE;
3197   b->colmap      = NULL;
3198   b->garray      = NULL;
3199   b->roworiented = PETSC_TRUE;
3200 
3201   /* stuff used in block assembly */
3202   b->barray = 0;
3203 
3204   /* stuff used for matrix vector multiply */
3205   b->lvec  = 0;
3206   b->Mvctx = 0;
3207 
3208   /* stuff for MatGetRow() */
3209   b->rowindices   = 0;
3210   b->rowvalues    = 0;
3211   b->getrowactive = PETSC_FALSE;
3212 
3213   /* hash table stuff */
3214   b->ht           = 0;
3215   b->hd           = 0;
3216   b->ht_size      = 0;
3217   b->ht_flag      = PETSC_FALSE;
3218   b->ht_fact      = 0;
3219   b->ht_total_ct  = 0;
3220   b->ht_insert_ct = 0;
3221 
3222   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3223   b->ijonly = PETSC_FALSE;
3224 
3225   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3226   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3227   if (flg) {
3228     PetscReal fact = 1.39;
3229     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3230     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3231     if (fact <= 1.0) fact = 1.39;
3232     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3233     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3234   }
3235   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3236 
3237 #if defined(PETSC_HAVE_MUMPS)
3238   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3239 #endif
3240   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C","MatConvert_MPIBAIJ_MPIAdj",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3241   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C","MatConvert_MPIBAIJ_MPIAIJ",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3242   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C","MatConvert_MPIBAIJ_MPISBAIJ",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3243   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_MPIBAIJ",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3244   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_MPIBAIJ",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3245   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIBAIJ",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3246   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C","MatMPIBAIJSetPreallocation_MPIBAIJ",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3247   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C","MatMPIBAIJSetPreallocationCSR_MPIBAIJ",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3248   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C","MatDiagonalScaleLocal_MPIBAIJ",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3249   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C","MatSetHashTableFactor_MPIBAIJ",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3250   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C","MatConvert_MPIBAIJ_MPIBSTRM",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3251   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3252   PetscFunctionReturn(0);
3253 }
3254 
3255 /*MC
3256    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3257 
3258    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3259    and MATMPIBAIJ otherwise.
3260 
3261    Options Database Keys:
3262 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3263 
3264   Level: beginner
3265 
3266 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3267 M*/
3268 
3269 #undef __FUNCT__
3270 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3271 /*@C
3272    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3273    (block compressed row).  For good matrix assembly performance
3274    the user should preallocate the matrix storage by setting the parameters
3275    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3276    performance can be increased by more than a factor of 50.
3277 
3278    Collective on Mat
3279 
3280    Input Parameters:
3281 +  A - the matrix
3282 .  bs   - size of blockk
3283 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3284            submatrix  (same for all local rows)
3285 .  d_nnz - array containing the number of block nonzeros in the various block rows
3286            of the in diagonal portion of the local (possibly different for each block
3287            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3288            set it even if it is zero.
3289 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3290            submatrix (same for all local rows).
3291 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3292            off-diagonal portion of the local submatrix (possibly different for
3293            each block row) or NULL.
3294 
3295    If the *_nnz parameter is given then the *_nz parameter is ignored
3296 
3297    Options Database Keys:
3298 +   -mat_block_size - size of the blocks to use
3299 -   -mat_use_hash_table <fact>
3300 
3301    Notes:
3302    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3303    than it must be used on all processors that share the object for that argument.
3304 
3305    Storage Information:
3306    For a square global matrix we define each processor's diagonal portion
3307    to be its local rows and the corresponding columns (a square submatrix);
3308    each processor's off-diagonal portion encompasses the remainder of the
3309    local matrix (a rectangular submatrix).
3310 
3311    The user can specify preallocated storage for the diagonal part of
3312    the local submatrix with either d_nz or d_nnz (not both).  Set
3313    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3314    memory allocation.  Likewise, specify preallocated storage for the
3315    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3316 
3317    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3318    the figure below we depict these three local rows and all columns (0-11).
3319 
3320 .vb
3321            0 1 2 3 4 5 6 7 8 9 10 11
3322           -------------------
3323    row 3  |  o o o d d d o o o o o o
3324    row 4  |  o o o d d d o o o o o o
3325    row 5  |  o o o d d d o o o o o o
3326           -------------------
3327 .ve
3328 
3329    Thus, any entries in the d locations are stored in the d (diagonal)
3330    submatrix, and any entries in the o locations are stored in the
3331    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3332    stored simply in the MATSEQBAIJ format for compressed row storage.
3333 
3334    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3335    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3336    In general, for PDE problems in which most nonzeros are near the diagonal,
3337    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3338    or you will get TERRIBLE performance; see the users' manual chapter on
3339    matrices.
3340 
3341    You can call MatGetInfo() to get information on how effective the preallocation was;
3342    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3343    You can also run with the option -info and look for messages with the string
3344    malloc in them to see if additional memory allocation was needed.
3345 
3346    Level: intermediate
3347 
3348 .keywords: matrix, block, aij, compressed row, sparse, parallel
3349 
3350 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3351 @*/
3352 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3353 {
3354   PetscErrorCode ierr;
3355 
3356   PetscFunctionBegin;
3357   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3358   PetscValidType(B,1);
3359   PetscValidLogicalCollectiveInt(B,bs,2);
3360   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3361   PetscFunctionReturn(0);
3362 }
3363 
3364 #undef __FUNCT__
3365 #define __FUNCT__ "MatCreateBAIJ"
3366 /*@C
3367    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3368    (block compressed row).  For good matrix assembly performance
3369    the user should preallocate the matrix storage by setting the parameters
3370    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3371    performance can be increased by more than a factor of 50.
3372 
3373    Collective on MPI_Comm
3374 
3375    Input Parameters:
3376 +  comm - MPI communicator
3377 .  bs   - size of blockk
3378 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3379            This value should be the same as the local size used in creating the
3380            y vector for the matrix-vector product y = Ax.
3381 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3382            This value should be the same as the local size used in creating the
3383            x vector for the matrix-vector product y = Ax.
3384 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3385 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3386 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3387            submatrix  (same for all local rows)
3388 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3389            of the in diagonal portion of the local (possibly different for each block
3390            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3391            and set it even if it is zero.
3392 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3393            submatrix (same for all local rows).
3394 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3395            off-diagonal portion of the local submatrix (possibly different for
3396            each block row) or NULL.
3397 
3398    Output Parameter:
3399 .  A - the matrix
3400 
3401    Options Database Keys:
3402 +   -mat_block_size - size of the blocks to use
3403 -   -mat_use_hash_table <fact>
3404 
3405    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3406    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3407    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3408 
3409    Notes:
3410    If the *_nnz parameter is given then the *_nz parameter is ignored
3411 
3412    A nonzero block is any block that as 1 or more nonzeros in it
3413 
3414    The user MUST specify either the local or global matrix dimensions
3415    (possibly both).
3416 
3417    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3418    than it must be used on all processors that share the object for that argument.
3419 
3420    Storage Information:
3421    For a square global matrix we define each processor's diagonal portion
3422    to be its local rows and the corresponding columns (a square submatrix);
3423    each processor's off-diagonal portion encompasses the remainder of the
3424    local matrix (a rectangular submatrix).
3425 
3426    The user can specify preallocated storage for the diagonal part of
3427    the local submatrix with either d_nz or d_nnz (not both).  Set
3428    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3429    memory allocation.  Likewise, specify preallocated storage for the
3430    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3431 
3432    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3433    the figure below we depict these three local rows and all columns (0-11).
3434 
3435 .vb
3436            0 1 2 3 4 5 6 7 8 9 10 11
3437           -------------------
3438    row 3  |  o o o d d d o o o o o o
3439    row 4  |  o o o d d d o o o o o o
3440    row 5  |  o o o d d d o o o o o o
3441           -------------------
3442 .ve
3443 
3444    Thus, any entries in the d locations are stored in the d (diagonal)
3445    submatrix, and any entries in the o locations are stored in the
3446    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3447    stored simply in the MATSEQBAIJ format for compressed row storage.
3448 
3449    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3450    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3451    In general, for PDE problems in which most nonzeros are near the diagonal,
3452    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3453    or you will get TERRIBLE performance; see the users' manual chapter on
3454    matrices.
3455 
3456    Level: intermediate
3457 
3458 .keywords: matrix, block, aij, compressed row, sparse, parallel
3459 
3460 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3461 @*/
3462 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3463 {
3464   PetscErrorCode ierr;
3465   PetscMPIInt    size;
3466 
3467   PetscFunctionBegin;
3468   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3469   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3470   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3471   if (size > 1) {
3472     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3473     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3474   } else {
3475     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3476     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3477   }
3478   PetscFunctionReturn(0);
3479 }
3480 
3481 #undef __FUNCT__
3482 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3483 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3484 {
3485   Mat            mat;
3486   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3487   PetscErrorCode ierr;
3488   PetscInt       len=0;
3489 
3490   PetscFunctionBegin;
3491   *newmat = 0;
3492   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3493   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3494   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3495   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3496 
3497   mat->factortype   = matin->factortype;
3498   mat->preallocated = PETSC_TRUE;
3499   mat->assembled    = PETSC_TRUE;
3500   mat->insertmode   = NOT_SET_VALUES;
3501 
3502   a             = (Mat_MPIBAIJ*)mat->data;
3503   mat->rmap->bs = matin->rmap->bs;
3504   a->bs2        = oldmat->bs2;
3505   a->mbs        = oldmat->mbs;
3506   a->nbs        = oldmat->nbs;
3507   a->Mbs        = oldmat->Mbs;
3508   a->Nbs        = oldmat->Nbs;
3509 
3510   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3511   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3512 
3513   a->size         = oldmat->size;
3514   a->rank         = oldmat->rank;
3515   a->donotstash   = oldmat->donotstash;
3516   a->roworiented  = oldmat->roworiented;
3517   a->rowindices   = 0;
3518   a->rowvalues    = 0;
3519   a->getrowactive = PETSC_FALSE;
3520   a->barray       = 0;
3521   a->rstartbs     = oldmat->rstartbs;
3522   a->rendbs       = oldmat->rendbs;
3523   a->cstartbs     = oldmat->cstartbs;
3524   a->cendbs       = oldmat->cendbs;
3525 
3526   /* hash table stuff */
3527   a->ht           = 0;
3528   a->hd           = 0;
3529   a->ht_size      = 0;
3530   a->ht_flag      = oldmat->ht_flag;
3531   a->ht_fact      = oldmat->ht_fact;
3532   a->ht_total_ct  = 0;
3533   a->ht_insert_ct = 0;
3534 
3535   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3536   if (oldmat->colmap) {
3537 #if defined(PETSC_USE_CTABLE)
3538     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3539 #else
3540     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3541     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3542     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3543 #endif
3544   } else a->colmap = 0;
3545 
3546   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3547     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3548     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3549     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3550   } else a->garray = 0;
3551 
3552   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3553   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3554   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3555   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3556   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3557 
3558   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3559   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3560   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3561   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3562   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3563   *newmat = mat;
3564   PetscFunctionReturn(0);
3565 }
3566 
3567 #undef __FUNCT__
3568 #define __FUNCT__ "MatLoad_MPIBAIJ"
3569 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3570 {
3571   PetscErrorCode ierr;
3572   int            fd;
3573   PetscInt       i,nz,j,rstart,rend;
3574   PetscScalar    *vals,*buf;
3575   MPI_Comm       comm;
3576   MPI_Status     status;
3577   PetscMPIInt    rank,size,maxnz;
3578   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3579   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3580   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3581   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3582   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3583   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3584 
3585   PetscFunctionBegin;
3586   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3587   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3588   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3589   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3590 
3591   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3592   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3593   if (!rank) {
3594     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3595     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3596     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3597   }
3598 
3599   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3600 
3601   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3602   M    = header[1]; N = header[2];
3603 
3604   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3605   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3606   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3607 
3608   /* If global sizes are set, check if they are consistent with that given in the file */
3609   if (sizesset) {
3610     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3611   }
3612   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3613   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3614 
3615   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3616 
3617   /*
3618      This code adds extra rows to make sure the number of rows is
3619      divisible by the blocksize
3620   */
3621   Mbs        = M/bs;
3622   extra_rows = bs - M + bs*Mbs;
3623   if (extra_rows == bs) extra_rows = 0;
3624   else                  Mbs++;
3625   if (extra_rows && !rank) {
3626     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3627   }
3628 
3629   /* determine ownership of all rows */
3630   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3631     mbs = Mbs/size + ((Mbs % size) > rank);
3632     m   = mbs*bs;
3633   } else { /* User set */
3634     m   = newmat->rmap->n;
3635     mbs = m/bs;
3636   }
3637   ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3638   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3639 
3640   /* process 0 needs enough room for process with most rows */
3641   if (!rank) {
3642     mmax = rowners[1];
3643     for (i=2; i<=size; i++) {
3644       mmax = PetscMax(mmax,rowners[i]);
3645     }
3646     mmax*=bs;
3647   } else mmax = -1;             /* unused, but compiler warns anyway */
3648 
3649   rowners[0] = 0;
3650   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3651   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3652   rstart = rowners[rank];
3653   rend   = rowners[rank+1];
3654 
3655   /* distribute row lengths to all processors */
3656   ierr = PetscMalloc(m*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3657   if (!rank) {
3658     mend = m;
3659     if (size == 1) mend = mend - extra_rows;
3660     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3661     for (j=mend; j<m; j++) locrowlens[j] = 1;
3662     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3663     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3664     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3665     for (j=0; j<m; j++) {
3666       procsnz[0] += locrowlens[j];
3667     }
3668     for (i=1; i<size; i++) {
3669       mend = browners[i+1] - browners[i];
3670       if (i == size-1) mend = mend - extra_rows;
3671       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3672       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3673       /* calculate the number of nonzeros on each processor */
3674       for (j=0; j<browners[i+1]-browners[i]; j++) {
3675         procsnz[i] += rowlengths[j];
3676       }
3677       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3678     }
3679     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3680   } else {
3681     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3682   }
3683 
3684   if (!rank) {
3685     /* determine max buffer needed and allocate it */
3686     maxnz = procsnz[0];
3687     for (i=1; i<size; i++) {
3688       maxnz = PetscMax(maxnz,procsnz[i]);
3689     }
3690     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3691 
3692     /* read in my part of the matrix column indices  */
3693     nz     = procsnz[0];
3694     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3695     mycols = ibuf;
3696     if (size == 1) nz -= extra_rows;
3697     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3698     if (size == 1) {
3699       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3700     }
3701 
3702     /* read in every ones (except the last) and ship off */
3703     for (i=1; i<size-1; i++) {
3704       nz   = procsnz[i];
3705       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3706       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3707     }
3708     /* read in the stuff for the last proc */
3709     if (size != 1) {
3710       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3711       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3712       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3713       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3714     }
3715     ierr = PetscFree(cols);CHKERRQ(ierr);
3716   } else {
3717     /* determine buffer space needed for message */
3718     nz = 0;
3719     for (i=0; i<m; i++) {
3720       nz += locrowlens[i];
3721     }
3722     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3723     mycols = ibuf;
3724     /* receive message of column indices*/
3725     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3726     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3727     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3728   }
3729 
3730   /* loop over local rows, determining number of off diagonal entries */
3731   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3732   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3733   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3734   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3735   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3736   rowcount = 0; nzcount = 0;
3737   for (i=0; i<mbs; i++) {
3738     dcount  = 0;
3739     odcount = 0;
3740     for (j=0; j<bs; j++) {
3741       kmax = locrowlens[rowcount];
3742       for (k=0; k<kmax; k++) {
3743         tmp = mycols[nzcount++]/bs;
3744         if (!mask[tmp]) {
3745           mask[tmp] = 1;
3746           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3747           else masked1[dcount++] = tmp;
3748         }
3749       }
3750       rowcount++;
3751     }
3752 
3753     dlens[i]  = dcount;
3754     odlens[i] = odcount;
3755 
3756     /* zero out the mask elements we set */
3757     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3758     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3759   }
3760 
3761 
3762   if (!sizesset) {
3763     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3764   }
3765   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3766 
3767   if (!rank) {
3768     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3769     /* read in my part of the matrix numerical values  */
3770     nz     = procsnz[0];
3771     vals   = buf;
3772     mycols = ibuf;
3773     if (size == 1) nz -= extra_rows;
3774     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3775     if (size == 1) {
3776       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3777     }
3778 
3779     /* insert into matrix */
3780     jj = rstart*bs;
3781     for (i=0; i<m; i++) {
3782       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3783       mycols += locrowlens[i];
3784       vals   += locrowlens[i];
3785       jj++;
3786     }
3787     /* read in other processors (except the last one) and ship out */
3788     for (i=1; i<size-1; i++) {
3789       nz   = procsnz[i];
3790       vals = buf;
3791       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3792       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3793     }
3794     /* the last proc */
3795     if (size != 1) {
3796       nz   = procsnz[i] - extra_rows;
3797       vals = buf;
3798       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3799       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3800       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3801     }
3802     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3803   } else {
3804     /* receive numeric values */
3805     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3806 
3807     /* receive message of values*/
3808     vals   = buf;
3809     mycols = ibuf;
3810     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3811 
3812     /* insert into matrix */
3813     jj = rstart*bs;
3814     for (i=0; i<m; i++) {
3815       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3816       mycols += locrowlens[i];
3817       vals   += locrowlens[i];
3818       jj++;
3819     }
3820   }
3821   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3822   ierr = PetscFree(buf);CHKERRQ(ierr);
3823   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3824   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3825   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3826   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3827   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3828   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3829   PetscFunctionReturn(0);
3830 }
3831 
3832 #undef __FUNCT__
3833 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3834 /*@
3835    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3836 
3837    Input Parameters:
3838 .  mat  - the matrix
3839 .  fact - factor
3840 
3841    Not Collective, each process can use a different factor
3842 
3843    Level: advanced
3844 
3845   Notes:
3846    This can also be set by the command line option: -mat_use_hash_table <fact>
3847 
3848 .keywords: matrix, hashtable, factor, HT
3849 
3850 .seealso: MatSetOption()
3851 @*/
3852 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3853 {
3854   PetscErrorCode ierr;
3855 
3856   PetscFunctionBegin;
3857   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3858   PetscFunctionReturn(0);
3859 }
3860 
3861 #undef __FUNCT__
3862 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3863 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3864 {
3865   Mat_MPIBAIJ *baij;
3866 
3867   PetscFunctionBegin;
3868   baij          = (Mat_MPIBAIJ*)mat->data;
3869   baij->ht_fact = fact;
3870   PetscFunctionReturn(0);
3871 }
3872 
3873 #undef __FUNCT__
3874 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3875 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3876 {
3877   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3878 
3879   PetscFunctionBegin;
3880   *Ad     = a->A;
3881   *Ao     = a->B;
3882   *colmap = a->garray;
3883   PetscFunctionReturn(0);
3884 }
3885 
3886 /*
3887     Special version for direct calls from Fortran (to eliminate two function call overheads
3888 */
3889 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3890 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3891 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3892 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3893 #endif
3894 
3895 #undef __FUNCT__
3896 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3897 /*@C
3898   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3899 
3900   Collective on Mat
3901 
3902   Input Parameters:
3903 + mat - the matrix
3904 . min - number of input rows
3905 . im - input rows
3906 . nin - number of input columns
3907 . in - input columns
3908 . v - numerical values input
3909 - addvin - INSERT_VALUES or ADD_VALUES
3910 
3911   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3912 
3913   Level: advanced
3914 
3915 .seealso:   MatSetValuesBlocked()
3916 @*/
3917 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3918 {
3919   /* convert input arguments to C version */
3920   Mat        mat  = *matin;
3921   PetscInt   m    = *min, n = *nin;
3922   InsertMode addv = *addvin;
3923 
3924   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3925   const MatScalar *value;
3926   MatScalar       *barray     = baij->barray;
3927   PetscBool       roworiented = baij->roworiented;
3928   PetscErrorCode  ierr;
3929   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3930   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3931   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3932 
3933   PetscFunctionBegin;
3934   /* tasks normally handled by MatSetValuesBlocked() */
3935   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3936 #if defined(PETSC_USE_DEBUG)
3937   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3938   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3939 #endif
3940   if (mat->assembled) {
3941     mat->was_assembled = PETSC_TRUE;
3942     mat->assembled     = PETSC_FALSE;
3943   }
3944   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3945 
3946 
3947   if (!barray) {
3948     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3949     baij->barray = barray;
3950   }
3951 
3952   if (roworiented) stepval = (n-1)*bs;
3953   else stepval = (m-1)*bs;
3954 
3955   for (i=0; i<m; i++) {
3956     if (im[i] < 0) continue;
3957 #if defined(PETSC_USE_DEBUG)
3958     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3959 #endif
3960     if (im[i] >= rstart && im[i] < rend) {
3961       row = im[i] - rstart;
3962       for (j=0; j<n; j++) {
3963         /* If NumCol = 1 then a copy is not required */
3964         if ((roworiented) && (n == 1)) {
3965           barray = (MatScalar*)v + i*bs2;
3966         } else if ((!roworiented) && (m == 1)) {
3967           barray = (MatScalar*)v + j*bs2;
3968         } else { /* Here a copy is required */
3969           if (roworiented) {
3970             value = v + i*(stepval+bs)*bs + j*bs;
3971           } else {
3972             value = v + j*(stepval+bs)*bs + i*bs;
3973           }
3974           for (ii=0; ii<bs; ii++,value+=stepval) {
3975             for (jj=0; jj<bs; jj++) {
3976               *barray++ = *value++;
3977             }
3978           }
3979           barray -=bs2;
3980         }
3981 
3982         if (in[j] >= cstart && in[j] < cend) {
3983           col  = in[j] - cstart;
3984           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3985         } else if (in[j] < 0) continue;
3986 #if defined(PETSC_USE_DEBUG)
3987         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3988 #endif
3989         else {
3990           if (mat->was_assembled) {
3991             if (!baij->colmap) {
3992               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3993             }
3994 
3995 #if defined(PETSC_USE_DEBUG)
3996 #if defined(PETSC_USE_CTABLE)
3997             { PetscInt data;
3998               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3999               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4000             }
4001 #else
4002             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4003 #endif
4004 #endif
4005 #if defined(PETSC_USE_CTABLE)
4006             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4007             col  = (col - 1)/bs;
4008 #else
4009             col = (baij->colmap[in[j]] - 1)/bs;
4010 #endif
4011             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4012               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4013               col  =  in[j];
4014             }
4015           } else col = in[j];
4016           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4017         }
4018       }
4019     } else {
4020       if (!baij->donotstash) {
4021         if (roworiented) {
4022           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4023         } else {
4024           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4025         }
4026       }
4027     }
4028   }
4029 
4030   /* task normally handled by MatSetValuesBlocked() */
4031   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4032   PetscFunctionReturn(0);
4033 }
4034 
4035 #undef __FUNCT__
4036 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4037 /*@
4038      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4039          CSR format the local rows.
4040 
4041    Collective on MPI_Comm
4042 
4043    Input Parameters:
4044 +  comm - MPI communicator
4045 .  bs - the block size, only a block size of 1 is supported
4046 .  m - number of local rows (Cannot be PETSC_DECIDE)
4047 .  n - This value should be the same as the local size used in creating the
4048        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4049        calculated if N is given) For square matrices n is almost always m.
4050 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4051 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4052 .   i - row indices
4053 .   j - column indices
4054 -   a - matrix values
4055 
4056    Output Parameter:
4057 .   mat - the matrix
4058 
4059    Level: intermediate
4060 
4061    Notes:
4062        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4063      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4064      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4065 
4066        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4067 
4068 .keywords: matrix, aij, compressed row, sparse, parallel
4069 
4070 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4071           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4072 @*/
4073 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4074 {
4075   PetscErrorCode ierr;
4076 
4077   PetscFunctionBegin;
4078   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4079   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4080   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4081   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4082   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4083   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4084   PetscFunctionReturn(0);
4085 }
4086