xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 82c86c8f8a181c1295b98e018a5a0771744eccf1)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {
29       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
30     }
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++) {
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
40       va[i] = vb[i];
41       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
42     }
43   }
44 
45   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
46   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
47   ierr = PetscFree(idxb);CHKERRQ(ierr);
48   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 EXTERN_C_BEGIN
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 EXTERN_C_END
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
70 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
77   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 /*
83      Local utility routine that creates a mapping from the global column
84    number to the local number in the off-diagonal part of the local
85    storage of the matrix.  This is done in a non scalable way since the
86    length of colmap equals the global matrix length.
87 */
88 #undef __FUNCT__
89 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
90 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
91 {
92   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
93   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
94   PetscErrorCode ierr;
95   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
96 
97   PetscFunctionBegin;
98 #if defined(PETSC_USE_CTABLE)
99   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
100   for (i=0; i<nbs; i++) {
101     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
102   }
103 #else
104   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
105   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
106   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
107   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
108 #endif
109   PetscFunctionReturn(0);
110 }
111 
112 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
113   { \
114  \
115     brow = row/bs;  \
116     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
117     rmax = aimax[brow]; nrow = ailen[brow]; \
118     bcol = col/bs; \
119     ridx = row % bs; cidx = col % bs; \
120     low  = 0; high = nrow; \
121     while (high-low > 3) { \
122       t = (low+high)/2; \
123       if (rp[t] > bcol) high = t; \
124       else              low  = t; \
125     } \
126     for (_i=low; _i<high; _i++) { \
127       if (rp[_i] > bcol) break; \
128       if (rp[_i] == bcol) { \
129         bap = ap +  bs2*_i + bs*cidx + ridx; \
130         if (addv == ADD_VALUES) *bap += value;  \
131         else                    *bap  = value;  \
132         goto a_noinsert; \
133       } \
134     } \
135     if (a->nonew == 1) goto a_noinsert; \
136     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
137     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
138     N = nrow++ - 1;  \
139     /* shift up all the later entries in this row */ \
140     for (ii=N; ii>=_i; ii--) { \
141       rp[ii+1] = rp[ii]; \
142       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
143     } \
144     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
145     rp[_i]                      = bcol;  \
146     ap[bs2*_i + bs*cidx + ridx] = value;  \
147 a_noinsert:; \
148     ailen[brow] = nrow; \
149   }
150 
151 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
152   { \
153     brow = row/bs;  \
154     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
155     rmax = bimax[brow]; nrow = bilen[brow]; \
156     bcol = col/bs; \
157     ridx = row % bs; cidx = col % bs; \
158     low  = 0; high = nrow; \
159     while (high-low > 3) { \
160       t = (low+high)/2; \
161       if (rp[t] > bcol) high = t; \
162       else              low  = t; \
163     } \
164     for (_i=low; _i<high; _i++) { \
165       if (rp[_i] > bcol) break; \
166       if (rp[_i] == bcol) { \
167         bap = ap +  bs2*_i + bs*cidx + ridx; \
168         if (addv == ADD_VALUES) *bap += value;  \
169         else                    *bap  = value;  \
170         goto b_noinsert; \
171       } \
172     } \
173     if (b->nonew == 1) goto b_noinsert; \
174     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
175     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
176     CHKMEMQ; \
177     N = nrow++ - 1;  \
178     /* shift up all the later entries in this row */ \
179     for (ii=N; ii>=_i; ii--) { \
180       rp[ii+1] = rp[ii]; \
181       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
182     } \
183     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
184     rp[_i]                      = bcol;  \
185     ap[bs2*_i + bs*cidx + ridx] = value;  \
186 b_noinsert:; \
187     bilen[brow] = nrow; \
188   }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatSetValues_MPIBAIJ"
192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
193 {
194   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
195   MatScalar      value;
196   PetscBool      roworiented = baij->roworiented;
197   PetscErrorCode ierr;
198   PetscInt       i,j,row,col;
199   PetscInt       rstart_orig=mat->rmap->rstart;
200   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
201   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
202 
203   /* Some Variables required in the macro */
204   Mat         A     = baij->A;
205   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
206   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
207   MatScalar   *aa   =a->a;
208 
209   Mat         B     = baij->B;
210   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
211   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
212   MatScalar   *ba   =b->a;
213 
214   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
215   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
216   MatScalar *ap,*bap;
217 
218   PetscFunctionBegin;
219   if (v) PetscValidScalarPointer(v,6);
220   for (i=0; i<m; i++) {
221     if (im[i] < 0) continue;
222 #if defined(PETSC_USE_DEBUG)
223     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
224 #endif
225     if (im[i] >= rstart_orig && im[i] < rend_orig) {
226       row = im[i] - rstart_orig;
227       for (j=0; j<n; j++) {
228         if (in[j] >= cstart_orig && in[j] < cend_orig) {
229           col = in[j] - cstart_orig;
230           if (roworiented) value = v[i*n+j];
231           else             value = v[i+j*m];
232           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
233           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
234         } else if (in[j] < 0) continue;
235 #if defined(PETSC_USE_DEBUG)
236         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
237 #endif
238         else {
239           if (mat->was_assembled) {
240             if (!baij->colmap) {
241               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
242             }
243 #if defined(PETSC_USE_CTABLE)
244             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
245             col  = col - 1;
246 #else
247             col = baij->colmap[in[j]/bs] - 1;
248 #endif
249             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
250               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
251               col  =  in[j];
252               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
253               B    = baij->B;
254               b    = (Mat_SeqBAIJ*)(B)->data;
255               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
256               ba   =b->a;
257             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
258             else col += in[j]%bs;
259           } else col = in[j];
260           if (roworiented) value = v[i*n+j];
261           else             value = v[i+j*m];
262           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
263           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
264         }
265       }
266     } else {
267       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
268       if (!baij->donotstash) {
269         mat->assembled = PETSC_FALSE;
270         if (roworiented) {
271           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
272         } else {
273           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
274         }
275       }
276     }
277   }
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
283 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
284 {
285   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
286   const PetscScalar *value;
287   MatScalar         *barray     = baij->barray;
288   PetscBool         roworiented = baij->roworiented;
289   PetscErrorCode    ierr;
290   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
291   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
292   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
293 
294   PetscFunctionBegin;
295   if (!barray) {
296     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
297     baij->barray = barray;
298   }
299 
300   if (roworiented) stepval = (n-1)*bs;
301   else stepval = (m-1)*bs;
302 
303   for (i=0; i<m; i++) {
304     if (im[i] < 0) continue;
305 #if defined(PETSC_USE_DEBUG)
306     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
307 #endif
308     if (im[i] >= rstart && im[i] < rend) {
309       row = im[i] - rstart;
310       for (j=0; j<n; j++) {
311         /* If NumCol = 1 then a copy is not required */
312         if ((roworiented) && (n == 1)) {
313           barray = (MatScalar*)v + i*bs2;
314         } else if ((!roworiented) && (m == 1)) {
315           barray = (MatScalar*)v + j*bs2;
316         } else { /* Here a copy is required */
317           if (roworiented) {
318             value = v + (i*(stepval+bs) + j)*bs;
319           } else {
320             value = v + (j*(stepval+bs) + i)*bs;
321           }
322           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
323             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
324             barray += bs;
325           }
326           barray -= bs2;
327         }
328 
329         if (in[j] >= cstart && in[j] < cend) {
330           col  = in[j] - cstart;
331           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
332         } else if (in[j] < 0) continue;
333 #if defined(PETSC_USE_DEBUG)
334         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
335 #endif
336         else {
337           if (mat->was_assembled) {
338             if (!baij->colmap) {
339               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
340             }
341 
342 #if defined(PETSC_USE_DEBUG)
343 #if defined(PETSC_USE_CTABLE)
344             { PetscInt data;
345               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
346               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
347             }
348 #else
349             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
350 #endif
351 #endif
352 #if defined(PETSC_USE_CTABLE)
353             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
354             col  = (col - 1)/bs;
355 #else
356             col = (baij->colmap[in[j]] - 1)/bs;
357 #endif
358             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
359               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
360               col  =  in[j];
361             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
362           } else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
389   PetscBool      roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408     row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j];
413         else             value = v[i+j*m];
414         /* Look up PetscInto the Hash Table */
415         key = (row/bs)*Nbs+(col/bs)+1;
416         h1  = HASH(size,key,tmp);
417 
418 
419         idx = h1;
420 #if defined(PETSC_USE_DEBUG)
421         insert_ct++;
422         total_ct++;
423         if (HT[idx] != key) {
424           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
425           if (idx == size) {
426             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
427             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
428           }
429         }
430 #else
431         if (HT[idx] != key) {
432           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
433           if (idx == size) {
434             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
435             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else if (!baij->donotstash) {
444       if (roworiented) {
445         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
446       } else {
447         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
448       }
449     }
450   }
451 #if defined(PETSC_USE_DEBUG)
452   baij->ht_total_ct  = total_ct;
453   baij->ht_insert_ct = insert_ct;
454 #endif
455   PetscFunctionReturn(0);
456 }
457 
458 #undef __FUNCT__
459 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
460 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
461 {
462   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
463   PetscBool         roworiented = baij->roworiented;
464   PetscErrorCode    ierr;
465   PetscInt          i,j,ii,jj,row,col;
466   PetscInt          rstart=baij->rstartbs;
467   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
468   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
469   PetscReal         tmp;
470   MatScalar         **HD = baij->hd,*baij_a;
471   const PetscScalar *v_t,*value;
472 #if defined(PETSC_USE_DEBUG)
473   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
474 #endif
475 
476   PetscFunctionBegin;
477   if (roworiented) stepval = (n-1)*bs;
478   else stepval = (m-1)*bs;
479 
480   for (i=0; i<m; i++) {
481 #if defined(PETSC_USE_DEBUG)
482     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
483     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
484 #endif
485     row = im[i];
486     v_t = v + i*nbs2;
487     if (row >= rstart && row < rend) {
488       for (j=0; j<n; j++) {
489         col = in[j];
490 
491         /* Look up into the Hash Table */
492         key = row*Nbs+col+1;
493         h1  = HASH(size,key,tmp);
494 
495         idx = h1;
496 #if defined(PETSC_USE_DEBUG)
497         total_ct++;
498         insert_ct++;
499         if (HT[idx] != key) {
500           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
501           if (idx == size) {
502             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
503             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
504           }
505         }
506 #else
507         if (HT[idx] != key) {
508           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
509           if (idx == size) {
510             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
511             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
512           }
513         }
514 #endif
515         baij_a = HD[idx];
516         if (roworiented) {
517           /*value = v + i*(stepval+bs)*bs + j*bs;*/
518           /* value = v + (i*(stepval+bs)+j)*bs; */
519           value = v_t;
520           v_t  += bs;
521           if (addv == ADD_VALUES) {
522             for (ii=0; ii<bs; ii++,value+=stepval) {
523               for (jj=ii; jj<bs2; jj+=bs) {
524                 baij_a[jj] += *value++;
525               }
526             }
527           } else {
528             for (ii=0; ii<bs; ii++,value+=stepval) {
529               for (jj=ii; jj<bs2; jj+=bs) {
530                 baij_a[jj] = *value++;
531               }
532             }
533           }
534         } else {
535           value = v + j*(stepval+bs)*bs + i*bs;
536           if (addv == ADD_VALUES) {
537             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
538               for (jj=0; jj<bs; jj++) {
539                 baij_a[jj] += *value++;
540               }
541             }
542           } else {
543             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
544               for (jj=0; jj<bs; jj++) {
545                 baij_a[jj] = *value++;
546               }
547             }
548           }
549         }
550       }
551     } else {
552       if (!baij->donotstash) {
553         if (roworiented) {
554           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
555         } else {
556           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
557         }
558       }
559     }
560   }
561 #if defined(PETSC_USE_DEBUG)
562   baij->ht_total_ct  = total_ct;
563   baij->ht_insert_ct = insert_ct;
564 #endif
565   PetscFunctionReturn(0);
566 }
567 
568 #undef __FUNCT__
569 #define __FUNCT__ "MatGetValues_MPIBAIJ"
570 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
571 {
572   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
573   PetscErrorCode ierr;
574   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
575   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
576 
577   PetscFunctionBegin;
578   for (i=0; i<m; i++) {
579     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
580     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
581     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
582       row = idxm[i] - bsrstart;
583       for (j=0; j<n; j++) {
584         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
585         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
586         if (idxn[j] >= bscstart && idxn[j] < bscend) {
587           col  = idxn[j] - bscstart;
588           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589         } else {
590           if (!baij->colmap) {
591             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
592           }
593 #if defined(PETSC_USE_CTABLE)
594           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
595           data--;
596 #else
597           data = baij->colmap[idxn[j]/bs]-1;
598 #endif
599           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
600           else {
601             col  = data + idxn[j]%bs;
602             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603           }
604         }
605       }
606     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
607   }
608   PetscFunctionReturn(0);
609 }
610 
611 #undef __FUNCT__
612 #define __FUNCT__ "MatNorm_MPIBAIJ"
613 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
614 {
615   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
616   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
617   PetscErrorCode ierr;
618   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
619   PetscReal      sum = 0.0;
620   MatScalar      *v;
621 
622   PetscFunctionBegin;
623   if (baij->size == 1) {
624     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
625   } else {
626     if (type == NORM_FROBENIUS) {
627       v  = amat->a;
628       nz = amat->nz*bs2;
629       for (i=0; i<nz; i++) {
630         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
631       }
632       v  = bmat->a;
633       nz = bmat->nz*bs2;
634       for (i=0; i<nz; i++) {
635         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
636       }
637       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
638       *nrm = PetscSqrtReal(*nrm);
639     } else if (type == NORM_1) { /* max column sum */
640       PetscReal *tmp,*tmp2;
641       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
642       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
643       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
644       v    = amat->a; jj = amat->j;
645       for (i=0; i<amat->nz; i++) {
646         for (j=0; j<bs; j++) {
647           col = bs*(cstart + *jj) + j; /* column index */
648           for (row=0; row<bs; row++) {
649             tmp[col] += PetscAbsScalar(*v);  v++;
650           }
651         }
652         jj++;
653       }
654       v = bmat->a; jj = bmat->j;
655       for (i=0; i<bmat->nz; i++) {
656         for (j=0; j<bs; j++) {
657           col = bs*garray[*jj] + j;
658           for (row=0; row<bs; row++) {
659             tmp[col] += PetscAbsScalar(*v); v++;
660           }
661         }
662         jj++;
663       }
664       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
665       *nrm = 0.0;
666       for (j=0; j<mat->cmap->N; j++) {
667         if (tmp2[j] > *nrm) *nrm = tmp2[j];
668       }
669       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
670     } else if (type == NORM_INFINITY) { /* max row sum */
671       PetscReal *sums;
672       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
673       sum  = 0.0;
674       for (j=0; j<amat->mbs; j++) {
675         for (row=0; row<bs; row++) sums[row] = 0.0;
676         v  = amat->a + bs2*amat->i[j];
677         nz = amat->i[j+1]-amat->i[j];
678         for (i=0; i<nz; i++) {
679           for (col=0; col<bs; col++) {
680             for (row=0; row<bs; row++) {
681               sums[row] += PetscAbsScalar(*v); v++;
682             }
683           }
684         }
685         v  = bmat->a + bs2*bmat->i[j];
686         nz = bmat->i[j+1]-bmat->i[j];
687         for (i=0; i<nz; i++) {
688           for (col=0; col<bs; col++) {
689             for (row=0; row<bs; row++) {
690               sums[row] += PetscAbsScalar(*v); v++;
691             }
692           }
693         }
694         for (row=0; row<bs; row++) {
695           if (sums[row] > sum) sum = sums[row];
696         }
697       }
698       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
699       ierr = PetscFree(sums);CHKERRQ(ierr);
700     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
701   }
702   PetscFunctionReturn(0);
703 }
704 
705 /*
706   Creates the hash table, and sets the table
707   This table is created only once.
708   If new entried need to be added to the matrix
709   then the hash table has to be destroyed and
710   recreated.
711 */
712 #undef __FUNCT__
713 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
714 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
715 {
716   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
717   Mat            A     = baij->A,B=baij->B;
718   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
719   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
720   PetscErrorCode ierr;
721   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
722   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
723   PetscInt       *HT,key;
724   MatScalar      **HD;
725   PetscReal      tmp;
726 #if defined(PETSC_USE_INFO)
727   PetscInt ct=0,max=0;
728 #endif
729 
730   PetscFunctionBegin;
731   if (baij->ht) PetscFunctionReturn(0);
732 
733   baij->ht_size = (PetscInt)(factor*nz);
734   ht_size       = baij->ht_size;
735 
736   /* Allocate Memory for Hash Table */
737   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
738   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
739   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
740   HD   = baij->hd;
741   HT   = baij->ht;
742 
743   /* Loop Over A */
744   for (i=0; i<a->mbs; i++) {
745     for (j=ai[i]; j<ai[i+1]; j++) {
746       row = i+rstart;
747       col = aj[j]+cstart;
748 
749       key = row*Nbs + col + 1;
750       h1  = HASH(ht_size,key,tmp);
751       for (k=0; k<ht_size; k++) {
752         if (!HT[(h1+k)%ht_size]) {
753           HT[(h1+k)%ht_size] = key;
754           HD[(h1+k)%ht_size] = a->a + j*bs2;
755           break;
756 #if defined(PETSC_USE_INFO)
757         } else {
758           ct++;
759 #endif
760         }
761       }
762 #if defined(PETSC_USE_INFO)
763       if (k> max) max = k;
764 #endif
765     }
766   }
767   /* Loop Over B */
768   for (i=0; i<b->mbs; i++) {
769     for (j=bi[i]; j<bi[i+1]; j++) {
770       row = i+rstart;
771       col = garray[bj[j]];
772       key = row*Nbs + col + 1;
773       h1  = HASH(ht_size,key,tmp);
774       for (k=0; k<ht_size; k++) {
775         if (!HT[(h1+k)%ht_size]) {
776           HT[(h1+k)%ht_size] = key;
777           HD[(h1+k)%ht_size] = b->a + j*bs2;
778           break;
779 #if defined(PETSC_USE_INFO)
780         } else {
781           ct++;
782 #endif
783         }
784       }
785 #if defined(PETSC_USE_INFO)
786       if (k> max) max = k;
787 #endif
788     }
789   }
790 
791   /* Print Summary */
792 #if defined(PETSC_USE_INFO)
793   for (i=0,j=0; i<ht_size; i++) {
794     if (HT[i]) j++;
795   }
796   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
797 #endif
798   PetscFunctionReturn(0);
799 }
800 
801 #undef __FUNCT__
802 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
803 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
804 {
805   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
806   PetscErrorCode ierr;
807   PetscInt       nstash,reallocs;
808   InsertMode     addv;
809 
810   PetscFunctionBegin;
811   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
812 
813   /* make sure all processors are either in INSERTMODE or ADDMODE */
814   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
815   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
816   mat->insertmode = addv; /* in case this processor had no cache */
817 
818   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
819   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
820   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
821   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
822   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
823   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
824   PetscFunctionReturn(0);
825 }
826 
827 #undef __FUNCT__
828 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
829 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
830 {
831   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
832   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
833   PetscErrorCode ierr;
834   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
835   PetscInt       *row,*col;
836   PetscBool      r1,r2,r3,other_disassembled;
837   MatScalar      *val;
838   InsertMode     addv = mat->insertmode;
839   PetscMPIInt    n;
840 
841   PetscFunctionBegin;
842   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
843   if (!baij->donotstash && !mat->nooffprocentries) {
844     while (1) {
845       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
846       if (!flg) break;
847 
848       for (i=0; i<n;) {
849         /* Now identify the consecutive vals belonging to the same row */
850         for (j=i,rstart=row[j]; j<n; j++) {
851           if (row[j] != rstart) break;
852         }
853         if (j < n) ncols = j-i;
854         else       ncols = n-i;
855         /* Now assemble all these values with a single function call */
856         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
857         i    = j;
858       }
859     }
860     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
861     /* Now process the block-stash. Since the values are stashed column-oriented,
862        set the roworiented flag to column oriented, and after MatSetValues()
863        restore the original flags */
864     r1 = baij->roworiented;
865     r2 = a->roworiented;
866     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
867 
868     baij->roworiented = PETSC_FALSE;
869     a->roworiented    = PETSC_FALSE;
870 
871     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
872     while (1) {
873       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
874       if (!flg) break;
875 
876       for (i=0; i<n;) {
877         /* Now identify the consecutive vals belonging to the same row */
878         for (j=i,rstart=row[j]; j<n; j++) {
879           if (row[j] != rstart) break;
880         }
881         if (j < n) ncols = j-i;
882         else       ncols = n-i;
883         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
884         i    = j;
885       }
886     }
887     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
888 
889     baij->roworiented = r1;
890     a->roworiented    = r2;
891 
892     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
893   }
894 
895   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
896   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
897 
898   /* determine if any processor has disassembled, if so we must
899      also disassemble ourselfs, in order that we may reassemble. */
900   /*
901      if nonzero structure of submatrix B cannot change then we know that
902      no processor disassembled thus we can skip this stuff
903   */
904   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
905     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
906     if (mat->was_assembled && !other_disassembled) {
907       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
908     }
909   }
910 
911   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
912     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
913   }
914   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
915   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
916   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
917 
918 #if defined(PETSC_USE_INFO)
919   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
920     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
921 
922     baij->ht_total_ct  = 0;
923     baij->ht_insert_ct = 0;
924   }
925 #endif
926   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
927     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
928 
929     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
930     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
931   }
932 
933   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
934 
935   baij->rowvalues = 0;
936   PetscFunctionReturn(0);
937 }
938 
939 #include <petscdraw.h>
940 #undef __FUNCT__
941 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
942 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
943 {
944   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
945   PetscErrorCode    ierr;
946   PetscMPIInt       size = baij->size,rank = baij->rank;
947   PetscInt          bs   = mat->rmap->bs;
948   PetscBool         iascii,isdraw;
949   PetscViewer       sviewer;
950   PetscViewerFormat format;
951 
952   PetscFunctionBegin;
953   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
954   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
955   if (iascii) {
956     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
957     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
958       MatInfo info;
959       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
960       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
961       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
962       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
963                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
964       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
965       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
966       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
967       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
968       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
969       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
970       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
971       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
972       PetscFunctionReturn(0);
973     } else if (format == PETSC_VIEWER_ASCII_INFO) {
974       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
975       PetscFunctionReturn(0);
976     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
977       PetscFunctionReturn(0);
978     }
979   }
980 
981   if (isdraw) {
982     PetscDraw draw;
983     PetscBool isnull;
984     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
985     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
986   }
987 
988   if (size == 1) {
989     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
990     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
991   } else {
992     /* assemble the entire matrix onto first processor. */
993     Mat         A;
994     Mat_SeqBAIJ *Aloc;
995     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
996     MatScalar   *a;
997 
998     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
999     /* Perhaps this should be the type of mat? */
1000     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1001     if (!rank) {
1002       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1003     } else {
1004       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1005     }
1006     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1007     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1008     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1009     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1010 
1011     /* copy over the A part */
1012     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1013     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1014     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1015 
1016     for (i=0; i<mbs; i++) {
1017       rvals[0] = bs*(baij->rstartbs + i);
1018       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1019       for (j=ai[i]; j<ai[i+1]; j++) {
1020         col = (baij->cstartbs+aj[j])*bs;
1021         for (k=0; k<bs; k++) {
1022           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1023           col++; a += bs;
1024         }
1025       }
1026     }
1027     /* copy over the B part */
1028     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1029     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1030     for (i=0; i<mbs; i++) {
1031       rvals[0] = bs*(baij->rstartbs + i);
1032       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1033       for (j=ai[i]; j<ai[i+1]; j++) {
1034         col = baij->garray[aj[j]]*bs;
1035         for (k=0; k<bs; k++) {
1036           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1037           col++; a += bs;
1038         }
1039       }
1040     }
1041     ierr = PetscFree(rvals);CHKERRQ(ierr);
1042     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1043     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1044     /*
1045        Everyone has to call to draw the matrix since the graphics waits are
1046        synchronized across all processors that share the PetscDraw object
1047     */
1048     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1049     if (!rank) {
1050       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1051       /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1052       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1053       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1054     }
1055     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1056     ierr = MatDestroy(&A);CHKERRQ(ierr);
1057   }
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1063 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1064 {
1065   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1066   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1067   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1068   PetscErrorCode ierr;
1069   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1070   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1071   int            fd;
1072   PetscScalar    *column_values;
1073   FILE           *file;
1074   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1075   PetscInt       message_count,flowcontrolcount;
1076 
1077   PetscFunctionBegin;
1078   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1079   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1080   nz   = bs2*(A->nz + B->nz);
1081   rlen = mat->rmap->n;
1082   if (!rank) {
1083     header[0] = MAT_FILE_CLASSID;
1084     header[1] = mat->rmap->N;
1085     header[2] = mat->cmap->N;
1086 
1087     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1088     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1089     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1090     /* get largest number of rows any processor has */
1091     range = mat->rmap->range;
1092     for (i=1; i<size; i++) {
1093       rlen = PetscMax(rlen,range[i+1] - range[i]);
1094     }
1095   } else {
1096     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1097   }
1098 
1099   ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1100   /* compute lengths of each row  */
1101   for (i=0; i<a->mbs; i++) {
1102     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1103   }
1104   /* store the row lengths to the file */
1105   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1106   if (!rank) {
1107     MPI_Status status;
1108     ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1109     rlen = (range[1] - range[0])/bs;
1110     for (i=0; i<rlen; i++) {
1111       for (j=0; j<bs; j++) {
1112         row_lens[i*bs+j] = bs*crow_lens[i];
1113       }
1114     }
1115     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1116     for (i=1; i<size; i++) {
1117       rlen = (range[i+1] - range[i])/bs;
1118       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1119       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1120       for (k=0; k<rlen; k++) {
1121         for (j=0; j<bs; j++) {
1122           row_lens[k*bs+j] = bs*crow_lens[k];
1123         }
1124       }
1125       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1126     }
1127     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1128     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1129   } else {
1130     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1131     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1132     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1133   }
1134   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1135 
1136   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1137      information needed to make it for each row from a block row. This does require more communication but still not more than
1138      the communication needed for the nonzero values  */
1139   nzmax = nz; /*  space a largest processor needs */
1140   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1141   ierr  = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1142   cnt   = 0;
1143   for (i=0; i<a->mbs; i++) {
1144     pcnt = cnt;
1145     for (j=B->i[i]; j<B->i[i+1]; j++) {
1146       if ((col = garray[B->j[j]]) > cstart) break;
1147       for (l=0; l<bs; l++) {
1148         column_indices[cnt++] = bs*col+l;
1149       }
1150     }
1151     for (k=A->i[i]; k<A->i[i+1]; k++) {
1152       for (l=0; l<bs; l++) {
1153         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1154       }
1155     }
1156     for (; j<B->i[i+1]; j++) {
1157       for (l=0; l<bs; l++) {
1158         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1159       }
1160     }
1161     len = cnt - pcnt;
1162     for (k=1; k<bs; k++) {
1163       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1164       cnt += len;
1165     }
1166   }
1167   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1168 
1169   /* store the columns to the file */
1170   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1171   if (!rank) {
1172     MPI_Status status;
1173     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1174     for (i=1; i<size; i++) {
1175       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1176       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1177       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1178       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1179     }
1180     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1181   } else {
1182     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1183     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1184     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1185     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1186   }
1187   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1188 
1189   /* load up the numerical values */
1190   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1191   cnt  = 0;
1192   for (i=0; i<a->mbs; i++) {
1193     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1194     for (j=B->i[i]; j<B->i[i+1]; j++) {
1195       if (garray[B->j[j]] > cstart) break;
1196       for (l=0; l<bs; l++) {
1197         for (ll=0; ll<bs; ll++) {
1198           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1199         }
1200       }
1201       cnt += bs;
1202     }
1203     for (k=A->i[i]; k<A->i[i+1]; k++) {
1204       for (l=0; l<bs; l++) {
1205         for (ll=0; ll<bs; ll++) {
1206           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1207         }
1208       }
1209       cnt += bs;
1210     }
1211     for (; j<B->i[i+1]; j++) {
1212       for (l=0; l<bs; l++) {
1213         for (ll=0; ll<bs; ll++) {
1214           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1215         }
1216       }
1217       cnt += bs;
1218     }
1219     cnt += (bs-1)*rlen;
1220   }
1221   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1222 
1223   /* store the column values to the file */
1224   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1225   if (!rank) {
1226     MPI_Status status;
1227     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1228     for (i=1; i<size; i++) {
1229       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1230       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1231       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1232       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1233     }
1234     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1235   } else {
1236     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1237     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1238     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1240   }
1241   ierr = PetscFree(column_values);CHKERRQ(ierr);
1242 
1243   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1244   if (file) {
1245     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1246   }
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "MatView_MPIBAIJ"
1252 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1253 {
1254   PetscErrorCode ierr;
1255   PetscBool      iascii,isdraw,issocket,isbinary;
1256 
1257   PetscFunctionBegin;
1258   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1259   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1260   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1261   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1262   if (iascii || isdraw || issocket) {
1263     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1264   } else if (isbinary) {
1265     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1266   }
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1272 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1273 {
1274   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1275   PetscErrorCode ierr;
1276 
1277   PetscFunctionBegin;
1278 #if defined(PETSC_USE_LOG)
1279   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1280 #endif
1281   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1282   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1283   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1284   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1285 #if defined(PETSC_USE_CTABLE)
1286   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1287 #else
1288   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1289 #endif
1290   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1291   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1292   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1293   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1294   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1295   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1296   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1297   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1298 
1299   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",NULL);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatMult_MPIBAIJ"
1314 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1315 {
1316   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1317   PetscErrorCode ierr;
1318   PetscInt       nt;
1319 
1320   PetscFunctionBegin;
1321   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1322   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1323   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1324   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1325   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1326   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1327   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1328   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1334 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1335 {
1336   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1337   PetscErrorCode ierr;
1338 
1339   PetscFunctionBegin;
1340   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1341   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1342   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1343   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1349 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1350 {
1351   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1352   PetscErrorCode ierr;
1353   PetscBool      merged;
1354 
1355   PetscFunctionBegin;
1356   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1357   /* do nondiagonal part */
1358   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1359   if (!merged) {
1360     /* send it on its way */
1361     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1362     /* do local part */
1363     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1364     /* receive remote parts: note this assumes the values are not actually */
1365     /* inserted in yy until the next line */
1366     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1367   } else {
1368     /* do local part */
1369     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1370     /* send it on its way */
1371     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1372     /* values actually were received in the Begin() but we need to call this nop */
1373     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1374   }
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #undef __FUNCT__
1379 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1380 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1381 {
1382   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1383   PetscErrorCode ierr;
1384 
1385   PetscFunctionBegin;
1386   /* do nondiagonal part */
1387   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1388   /* send it on its way */
1389   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1390   /* do local part */
1391   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1392   /* receive remote parts: note this assumes the values are not actually */
1393   /* inserted in yy until the next line, which is true for my implementation*/
1394   /* but is not perhaps always true. */
1395   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 /*
1400   This only works correctly for square matrices where the subblock A->A is the
1401    diagonal block
1402 */
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1405 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1406 {
1407   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1408   PetscErrorCode ierr;
1409 
1410   PetscFunctionBegin;
1411   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1412   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatScale_MPIBAIJ"
1418 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1419 {
1420   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1421   PetscErrorCode ierr;
1422 
1423   PetscFunctionBegin;
1424   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1425   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1431 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1432 {
1433   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1434   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1435   PetscErrorCode ierr;
1436   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1437   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1438   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1439 
1440   PetscFunctionBegin;
1441   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1442   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1443   mat->getrowactive = PETSC_TRUE;
1444 
1445   if (!mat->rowvalues && (idx || v)) {
1446     /*
1447         allocate enough space to hold information from the longest row.
1448     */
1449     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1450     PetscInt    max = 1,mbs = mat->mbs,tmp;
1451     for (i=0; i<mbs; i++) {
1452       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1453       if (max < tmp) max = tmp;
1454     }
1455     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1456   }
1457   lrow = row - brstart;
1458 
1459   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1460   if (!v)   {pvA = 0; pvB = 0;}
1461   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1462   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1463   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1464   nztot = nzA + nzB;
1465 
1466   cmap = mat->garray;
1467   if (v  || idx) {
1468     if (nztot) {
1469       /* Sort by increasing column numbers, assuming A and B already sorted */
1470       PetscInt imark = -1;
1471       if (v) {
1472         *v = v_p = mat->rowvalues;
1473         for (i=0; i<nzB; i++) {
1474           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1475           else break;
1476         }
1477         imark = i;
1478         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1479         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1480       }
1481       if (idx) {
1482         *idx = idx_p = mat->rowindices;
1483         if (imark > -1) {
1484           for (i=0; i<imark; i++) {
1485             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1486           }
1487         } else {
1488           for (i=0; i<nzB; i++) {
1489             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1490             else break;
1491           }
1492           imark = i;
1493         }
1494         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1495         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1496       }
1497     } else {
1498       if (idx) *idx = 0;
1499       if (v)   *v   = 0;
1500     }
1501   }
1502   *nz  = nztot;
1503   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1504   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 #undef __FUNCT__
1509 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1510 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1511 {
1512   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1513 
1514   PetscFunctionBegin;
1515   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1516   baij->getrowactive = PETSC_FALSE;
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1522 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1523 {
1524   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1525   PetscErrorCode ierr;
1526 
1527   PetscFunctionBegin;
1528   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1529   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1535 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1536 {
1537   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1538   Mat            A  = a->A,B = a->B;
1539   PetscErrorCode ierr;
1540   PetscReal      isend[5],irecv[5];
1541 
1542   PetscFunctionBegin;
1543   info->block_size = (PetscReal)matin->rmap->bs;
1544 
1545   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1546 
1547   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1548   isend[3] = info->memory;  isend[4] = info->mallocs;
1549 
1550   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1551 
1552   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1553   isend[3] += info->memory;  isend[4] += info->mallocs;
1554 
1555   if (flag == MAT_LOCAL) {
1556     info->nz_used      = isend[0];
1557     info->nz_allocated = isend[1];
1558     info->nz_unneeded  = isend[2];
1559     info->memory       = isend[3];
1560     info->mallocs      = isend[4];
1561   } else if (flag == MAT_GLOBAL_MAX) {
1562     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1563 
1564     info->nz_used      = irecv[0];
1565     info->nz_allocated = irecv[1];
1566     info->nz_unneeded  = irecv[2];
1567     info->memory       = irecv[3];
1568     info->mallocs      = irecv[4];
1569   } else if (flag == MAT_GLOBAL_SUM) {
1570     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1571 
1572     info->nz_used      = irecv[0];
1573     info->nz_allocated = irecv[1];
1574     info->nz_unneeded  = irecv[2];
1575     info->memory       = irecv[3];
1576     info->mallocs      = irecv[4];
1577   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1578   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1579   info->fill_ratio_needed = 0;
1580   info->factor_mallocs    = 0;
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1586 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1587 {
1588   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1589   PetscErrorCode ierr;
1590 
1591   PetscFunctionBegin;
1592   switch (op) {
1593   case MAT_NEW_NONZERO_LOCATIONS:
1594   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1595   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1596   case MAT_KEEP_NONZERO_PATTERN:
1597   case MAT_NEW_NONZERO_LOCATION_ERR:
1598     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1599     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1600     break;
1601   case MAT_ROW_ORIENTED:
1602     a->roworiented = flg;
1603 
1604     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1605     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1606     break;
1607   case MAT_NEW_DIAGONALS:
1608     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1609     break;
1610   case MAT_IGNORE_OFF_PROC_ENTRIES:
1611     a->donotstash = flg;
1612     break;
1613   case MAT_USE_HASH_TABLE:
1614     a->ht_flag = flg;
1615     break;
1616   case MAT_SYMMETRIC:
1617   case MAT_STRUCTURALLY_SYMMETRIC:
1618   case MAT_HERMITIAN:
1619   case MAT_SYMMETRY_ETERNAL:
1620     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1621     break;
1622   default:
1623     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1624   }
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 #undef __FUNCT__
1629 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1630 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1631 {
1632   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1633   Mat_SeqBAIJ    *Aloc;
1634   Mat            B;
1635   PetscErrorCode ierr;
1636   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1637   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1638   MatScalar      *a;
1639 
1640   PetscFunctionBegin;
1641   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1642   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1643     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1644     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1645     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1646     /* Do not know preallocation information, but must set block size */
1647     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1648   } else {
1649     B = *matout;
1650   }
1651 
1652   /* copy over the A part */
1653   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1654   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1655   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1656 
1657   for (i=0; i<mbs; i++) {
1658     rvals[0] = bs*(baij->rstartbs + i);
1659     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1660     for (j=ai[i]; j<ai[i+1]; j++) {
1661       col = (baij->cstartbs+aj[j])*bs;
1662       for (k=0; k<bs; k++) {
1663         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1664 
1665         col++; a += bs;
1666       }
1667     }
1668   }
1669   /* copy over the B part */
1670   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1671   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1672   for (i=0; i<mbs; i++) {
1673     rvals[0] = bs*(baij->rstartbs + i);
1674     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1675     for (j=ai[i]; j<ai[i+1]; j++) {
1676       col = baij->garray[aj[j]]*bs;
1677       for (k=0; k<bs; k++) {
1678         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1679         col++;
1680         a += bs;
1681       }
1682     }
1683   }
1684   ierr = PetscFree(rvals);CHKERRQ(ierr);
1685   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1686   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1687 
1688   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1689   else {
1690     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1691   }
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1697 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1698 {
1699   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1700   Mat            a     = baij->A,b = baij->B;
1701   PetscErrorCode ierr;
1702   PetscInt       s1,s2,s3;
1703 
1704   PetscFunctionBegin;
1705   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1706   if (rr) {
1707     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1708     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1709     /* Overlap communication with computation. */
1710     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1711   }
1712   if (ll) {
1713     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1714     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1715     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1716   }
1717   /* scale  the diagonal block */
1718   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1719 
1720   if (rr) {
1721     /* Do a scatter end and then right scale the off-diagonal block */
1722     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1723     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1724   }
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 #undef __FUNCT__
1729 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1730 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1731 {
1732   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1733   PetscErrorCode    ierr;
1734   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1735   PetscInt          i,*owners = A->rmap->range;
1736   PetscInt          *nprocs,j,idx,nsends,row;
1737   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1738   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1739   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1740   MPI_Comm          comm;
1741   MPI_Request       *send_waits,*recv_waits;
1742   MPI_Status        recv_status,*send_status;
1743   const PetscScalar *xx;
1744   PetscScalar       *bb;
1745 #if defined(PETSC_DEBUG)
1746   PetscBool         found = PETSC_FALSE;
1747 #endif
1748 
1749   PetscFunctionBegin;
1750   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1751   /*  first count number of contributors to each processor */
1752   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1753   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);  /* see note*/
1755   j    = 0;
1756   for (i=0; i<N; i++) {
1757     if (lastidx > (idx = rows[i])) j = 0;
1758     lastidx = idx;
1759     for (; j<size; j++) {
1760       if (idx >= owners[j] && idx < owners[j+1]) {
1761         nprocs[2*j]++;
1762         nprocs[2*j+1] = 1;
1763         owner[i]      = j;
1764 #if defined(PETSC_DEBUG)
1765         found = PETSC_TRUE;
1766 #endif
1767         break;
1768       }
1769     }
1770 #if defined(PETSC_DEBUG)
1771     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1772     found = PETSC_FALSE;
1773 #endif
1774   }
1775   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
1776 
1777   if (A->nooffproczerorows) {
1778     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1779     nrecvs = nsends;
1780     nmax   = N;
1781   } else {
1782     /* inform other processors of number of messages and max length*/
1783     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1784   }
1785 
1786   /* post receives:   */
1787   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1788   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1789   for (i=0; i<nrecvs; i++) {
1790     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1791   }
1792 
1793   /* do sends:
1794      1) starts[i] gives the starting index in svalues for stuff going to
1795      the ith processor
1796   */
1797   ierr      = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1798   ierr      = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1799   ierr      = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1800   starts[0] = 0;
1801   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1802   for (i=0; i<N; i++) {
1803     svalues[starts[owner[i]]++] = rows[i];
1804   }
1805 
1806   starts[0] = 0;
1807   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1808   count = 0;
1809   for (i=0; i<size; i++) {
1810     if (nprocs[2*i+1]) {
1811       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1812     }
1813   }
1814   ierr = PetscFree(starts);CHKERRQ(ierr);
1815 
1816   base = owners[rank];
1817 
1818   /*  wait on receives */
1819   ierr  = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1820   count = nrecvs;
1821   slen  = 0;
1822   while (count) {
1823     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1824     /* unpack receives into our local space */
1825     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1826 
1827     source[imdex] = recv_status.MPI_SOURCE;
1828     lens[imdex]   = n;
1829     slen         += n;
1830     count--;
1831   }
1832   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1833 
1834   /* move the data into the send scatter */
1835   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1836   count = 0;
1837   for (i=0; i<nrecvs; i++) {
1838     values = rvalues + i*nmax;
1839     for (j=0; j<lens[i]; j++) {
1840       lrows[count++] = values[j] - base;
1841     }
1842   }
1843   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1844   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1845   ierr = PetscFree(owner);CHKERRQ(ierr);
1846   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1847 
1848   /* fix right hand side if needed */
1849   if (x && b) {
1850     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1851     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1852     for (i=0; i<slen; i++) {
1853       bb[lrows[i]] = diag*xx[lrows[i]];
1854     }
1855     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1856     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1857   }
1858 
1859   /* actually zap the local rows */
1860   /*
1861         Zero the required rows. If the "diagonal block" of the matrix
1862      is square and the user wishes to set the diagonal we use separate
1863      code so that MatSetValues() is not called for each diagonal allocating
1864      new memory, thus calling lots of mallocs and slowing things down.
1865 
1866   */
1867   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1868   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1869   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1870     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1871   } else if (diag != 0.0) {
1872     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1873     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1874        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1875     for (i=0; i<slen; i++) {
1876       row  = lrows[i] + rstart_bs;
1877       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1878     }
1879     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1880     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1881   } else {
1882     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1883   }
1884 
1885   ierr = PetscFree(lrows);CHKERRQ(ierr);
1886 
1887   /* wait on sends */
1888   if (nsends) {
1889     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1890     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1891     ierr = PetscFree(send_status);CHKERRQ(ierr);
1892   }
1893   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1894   ierr = PetscFree(svalues);CHKERRQ(ierr);
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1900 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1901 {
1902   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1903   PetscErrorCode ierr;
1904 
1905   PetscFunctionBegin;
1906   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatEqual_MPIBAIJ"
1914 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1915 {
1916   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1917   Mat            a,b,c,d;
1918   PetscBool      flg;
1919   PetscErrorCode ierr;
1920 
1921   PetscFunctionBegin;
1922   a = matA->A; b = matA->B;
1923   c = matB->A; d = matB->B;
1924 
1925   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1926   if (flg) {
1927     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1928   }
1929   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 #undef __FUNCT__
1934 #define __FUNCT__ "MatCopy_MPIBAIJ"
1935 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1936 {
1937   PetscErrorCode ierr;
1938   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1939   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1940 
1941   PetscFunctionBegin;
1942   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1943   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1944     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1945   } else {
1946     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1947     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1948   }
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1954 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1955 {
1956   PetscErrorCode ierr;
1957 
1958   PetscFunctionBegin;
1959   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1965 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1966 {
1967   PetscErrorCode ierr;
1968   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1969   PetscBLASInt   bnz,one=1;
1970   Mat_SeqBAIJ    *x,*y;
1971 
1972   PetscFunctionBegin;
1973   if (str == SAME_NONZERO_PATTERN) {
1974     PetscScalar alpha = a;
1975     x    = (Mat_SeqBAIJ*)xx->A->data;
1976     y    = (Mat_SeqBAIJ*)yy->A->data;
1977     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1978     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1979     x    = (Mat_SeqBAIJ*)xx->B->data;
1980     y    = (Mat_SeqBAIJ*)yy->B->data;
1981     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1982     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1983   } else {
1984     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1985   }
1986   PetscFunctionReturn(0);
1987 }
1988 
1989 #undef __FUNCT__
1990 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1991 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1992 {
1993   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1994   PetscErrorCode ierr;
1995 
1996   PetscFunctionBegin;
1997   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1998   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 #undef __FUNCT__
2003 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2004 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2005 {
2006   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2007   PetscErrorCode ierr;
2008 
2009   PetscFunctionBegin;
2010   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2011   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 #undef __FUNCT__
2016 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2017 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2018 {
2019   PetscErrorCode ierr;
2020   IS             iscol_local;
2021   PetscInt       csize;
2022 
2023   PetscFunctionBegin;
2024   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2025   if (call == MAT_REUSE_MATRIX) {
2026     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2027     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2028   } else {
2029     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2030   }
2031   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2032   if (call == MAT_INITIAL_MATRIX) {
2033     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2034     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2035   }
2036   PetscFunctionReturn(0);
2037 }
2038 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2039 #undef __FUNCT__
2040 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2041 /*
2042   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2043   in local and then by concatenating the local matrices the end result.
2044   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2045 */
2046 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2047 {
2048   PetscErrorCode ierr;
2049   PetscMPIInt    rank,size;
2050   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2051   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2052   Mat            M,Mreuse;
2053   MatScalar      *vwork,*aa;
2054   MPI_Comm       comm;
2055   IS             isrow_new, iscol_new;
2056   PetscBool      idflag,allrows, allcols;
2057   Mat_SeqBAIJ    *aij;
2058 
2059   PetscFunctionBegin;
2060   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2061   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2062   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2063   /* The compression and expansion should be avoided. Doesn't point
2064      out errors, might change the indices, hence buggey */
2065   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2066   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2067 
2068   /* Check for special case: each processor gets entire matrix columns */
2069   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2070   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2071   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2072   else allcols = PETSC_FALSE;
2073 
2074   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2075   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2076   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2077   else allrows = PETSC_FALSE;
2078 
2079   if (call ==  MAT_REUSE_MATRIX) {
2080     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2081     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2082     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2083   } else {
2084     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2085   }
2086   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2087   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2088   /*
2089       m - number of local rows
2090       n - number of columns (same on all processors)
2091       rstart - first row in new global matrix generated
2092   */
2093   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2094   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2095   m    = m/bs;
2096   n    = n/bs;
2097 
2098   if (call == MAT_INITIAL_MATRIX) {
2099     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2100     ii  = aij->i;
2101     jj  = aij->j;
2102 
2103     /*
2104         Determine the number of non-zeros in the diagonal and off-diagonal
2105         portions of the matrix in order to do correct preallocation
2106     */
2107 
2108     /* first get start and end of "diagonal" columns */
2109     if (csize == PETSC_DECIDE) {
2110       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2111       if (mglobal == n*bs) { /* square matrix */
2112         nlocal = m;
2113       } else {
2114         nlocal = n/size + ((n % size) > rank);
2115       }
2116     } else {
2117       nlocal = csize/bs;
2118     }
2119     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2120     rstart = rend - nlocal;
2121     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2122 
2123     /* next, compute all the lengths */
2124     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2125     olens = dlens + m;
2126     for (i=0; i<m; i++) {
2127       jend = ii[i+1] - ii[i];
2128       olen = 0;
2129       dlen = 0;
2130       for (j=0; j<jend; j++) {
2131         if (*jj < rstart || *jj >= rend) olen++;
2132         else dlen++;
2133         jj++;
2134       }
2135       olens[i] = olen;
2136       dlens[i] = dlen;
2137     }
2138     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2139     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2140     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2141     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2142     ierr = PetscFree(dlens);CHKERRQ(ierr);
2143   } else {
2144     PetscInt ml,nl;
2145 
2146     M    = *newmat;
2147     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2148     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2149     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2150     /*
2151          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2152        rather than the slower MatSetValues().
2153     */
2154     M->was_assembled = PETSC_TRUE;
2155     M->assembled     = PETSC_FALSE;
2156   }
2157   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2158   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2159   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2160   ii   = aij->i;
2161   jj   = aij->j;
2162   aa   = aij->a;
2163   for (i=0; i<m; i++) {
2164     row   = rstart/bs + i;
2165     nz    = ii[i+1] - ii[i];
2166     cwork = jj;     jj += nz;
2167     vwork = aa;     aa += nz*bs*bs;
2168     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2169   }
2170 
2171   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2172   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2173   *newmat = M;
2174 
2175   /* save submatrix used in processor for next request */
2176   if (call ==  MAT_INITIAL_MATRIX) {
2177     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2178     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2179   }
2180   PetscFunctionReturn(0);
2181 }
2182 
2183 #undef __FUNCT__
2184 #define __FUNCT__ "MatPermute_MPIBAIJ"
2185 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2186 {
2187   MPI_Comm       comm,pcomm;
2188   PetscInt       first,rlocal_size,clocal_size,nrows;
2189   const PetscInt *rows;
2190   PetscMPIInt    size;
2191   IS             crowp,growp,irowp,lrowp,lcolp;
2192   PetscErrorCode ierr;
2193 
2194   PetscFunctionBegin;
2195   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2196   /* make a collective version of 'rowp' */
2197   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2198   if (pcomm==comm) {
2199     crowp = rowp;
2200   } else {
2201     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2202     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2203     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2204     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2205   }
2206   /* collect the global row permutation and invert it */
2207   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2208   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2209   if (pcomm!=comm) {
2210     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2211   }
2212   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2213   ierr = ISDestroy(&growp);CHKERRQ(ierr);
2214   /* get the local target indices */
2215   ierr = MatGetOwnershipRange(A,&first,NULL);CHKERRQ(ierr);
2216   ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr);
2217   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2218   ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2219   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2220   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2221   /* the column permutation is so much easier;
2222      make a local version of 'colp' and invert it */
2223   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2224   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2225   if (size==1) {
2226     lcolp = colp;
2227   } else {
2228     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2229   }
2230   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2231   /* now we just get the submatrix */
2232   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2233   if (size>1) {
2234     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2235   }
2236   /* clean up */
2237   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 #undef __FUNCT__
2242 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2243 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2244 {
2245   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2246   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2247 
2248   PetscFunctionBegin;
2249   if (nghosts) *nghosts = B->nbs;
2250   if (ghosts) *ghosts = baij->garray;
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2258 /*
2259     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2260 */
2261 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2262 {
2263   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2264   PetscErrorCode         ierr;
2265   PetscMPIInt            size,*ncolsonproc,*disp,nn;
2266   PetscInt               bs,i,n,nrows,j,k,m,ncols,col;
2267   const PetscInt         *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj,*ltog;
2268   PetscInt               nis = iscoloring->n,nctot,*cols;
2269   PetscInt               *rowhit,M,cstart,cend,colb;
2270   PetscInt               *columnsforrow,l;
2271   IS                     *isa;
2272   PetscBool              done,flg;
2273   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2274   PetscInt               ctype=c->ctype;
2275 
2276   PetscFunctionBegin;
2277   if (!mat->assembled) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2278   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2279 
2280   if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,&ltog);CHKERRQ(ierr);}
2281   else     ltog = NULL;
2282   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2283   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2284 
2285   M         = mat->rmap->n/bs;
2286   cstart    = mat->cmap->rstart/bs;
2287   cend      = mat->cmap->rend/bs;
2288   c->M      = mat->rmap->N/bs;         /* set the global rows and columns and local rows */
2289   c->N      = mat->cmap->N/bs;
2290   c->m      = mat->rmap->n/bs;
2291   c->rstart = mat->rmap->rstart/bs;
2292 
2293   c->ncolors = nis;
2294   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2295   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2296   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2297   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2298   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2299   ierr       = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2300 
2301   /* Allow access to data structures of local part of matrix */
2302   if (!baij->colmap) {
2303     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2304   }
2305   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2306   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2307 
2308   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2309   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2310 
2311   for (i=0; i<nis; i++) {
2312     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2313     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2314 
2315     c->ncolumns[i] = n;
2316     if (n) {
2317       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2318       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2319       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2320     } else {
2321       c->columns[i] = 0;
2322     }
2323 
2324     if (ctype == IS_COLORING_GLOBAL) {
2325       /* Determine the total (parallel) number of columns of this color */
2326       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
2327       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2328 
2329       ierr  = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
2330       ierr  = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2331       nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j];
2332       if (!nctot) {
2333         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2334       }
2335 
2336       disp[0] = 0;
2337       for (j=1; j<size; j++) {
2338         disp[j] = disp[j-1] + ncolsonproc[j-1];
2339       }
2340 
2341       /* Get complete list of columns for color on each processor */
2342       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2343       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2344       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2345     } else if (ctype == IS_COLORING_GHOSTED) {
2346       /* Determine local number of columns of this color on this process, including ghost points */
2347       nctot = n;
2348       ierr  = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2349       ierr  = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2350     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2351 
2352     /*
2353        Mark all rows affect by these columns
2354     */
2355     /* Temporary option to allow for debugging/testing */
2356     flg  = PETSC_FALSE;
2357     ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
2358     if (!flg) { /*-----------------------------------------------------------------------------*/
2359       /* crude, fast version */
2360       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2361       /* loop over columns*/
2362       for (j=0; j<nctot; j++) {
2363         if (ctype == IS_COLORING_GHOSTED) {
2364           col = ltog[cols[j]];
2365         } else {
2366           col = cols[j];
2367         }
2368         if (col >= cstart && col < cend) {
2369           /* column is in diagonal block of matrix */
2370           rows = A_cj + A_ci[col-cstart];
2371           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2372         } else {
2373 #if defined(PETSC_USE_CTABLE)
2374           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2375           colb--;
2376 #else
2377           colb = baij->colmap[col] - 1;
2378 #endif
2379           if (colb == -1) {
2380             m = 0;
2381           } else {
2382             colb = colb/bs;
2383             rows = B_cj + B_ci[colb];
2384             m    = B_ci[colb+1] - B_ci[colb];
2385           }
2386         }
2387         /* loop over columns marking them in rowhit */
2388         for (k=0; k<m; k++) {
2389           rowhit[*rows++] = col + 1;
2390         }
2391       }
2392 
2393       /* count the number of hits */
2394       nrows = 0;
2395       for (j=0; j<M; j++) {
2396         if (rowhit[j]) nrows++;
2397       }
2398       c->nrows[i] = nrows;
2399       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2400       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2401       ierr        = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2402       nrows       = 0;
2403       for (j=0; j<M; j++) {
2404         if (rowhit[j]) {
2405           c->rows[i][nrows]          = j;
2406           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2407           nrows++;
2408         }
2409       }
2410     } else { /*-------------------------------------------------------------------------------*/
2411       /* slow version, using rowhit as a linked list */
2412       PetscInt currentcol,fm,mfm;
2413       rowhit[M] = M;
2414       nrows     = 0;
2415       /* loop over columns*/
2416       for (j=0; j<nctot; j++) {
2417         if (ctype == IS_COLORING_GHOSTED) {
2418           col = ltog[cols[j]];
2419         } else {
2420           col = cols[j];
2421         }
2422         if (col >= cstart && col < cend) {
2423           /* column is in diagonal block of matrix */
2424           rows = A_cj + A_ci[col-cstart];
2425           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2426         } else {
2427 #if defined(PETSC_USE_CTABLE)
2428           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2429           colb--;
2430 #else
2431           colb = baij->colmap[col] - 1;
2432 #endif
2433           if (colb == -1) {
2434             m = 0;
2435           } else {
2436             colb = colb/bs;
2437             rows = B_cj + B_ci[colb];
2438             m    = B_ci[colb+1] - B_ci[colb];
2439           }
2440         }
2441 
2442         /* loop over columns marking them in rowhit */
2443         fm = M;    /* fm points to first entry in linked list */
2444         for (k=0; k<m; k++) {
2445           currentcol = *rows++;
2446           /* is it already in the list? */
2447           do {
2448             mfm = fm;
2449             fm  = rowhit[fm];
2450           } while (fm < currentcol);
2451           /* not in list so add it */
2452           if (fm != currentcol) {
2453             nrows++;
2454             columnsforrow[currentcol] = col;
2455             /* next three lines insert new entry into linked list */
2456             rowhit[mfm]        = currentcol;
2457             rowhit[currentcol] = fm;
2458             fm                 = currentcol;
2459             /* fm points to present position in list since we know the columns are sorted */
2460           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2461         }
2462       }
2463       c->nrows[i] = nrows;
2464       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2465       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2466       ierr        = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2467       /* now store the linked list of rows into c->rows[i] */
2468       nrows = 0;
2469       fm    = rowhit[M];
2470       do {
2471         c->rows[i][nrows]            = fm;
2472         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2473         fm                           = rowhit[fm];
2474       } while (fm < M);
2475     } /* ---------------------------------------------------------------------------------------*/
2476     ierr = PetscFree(cols);CHKERRQ(ierr);
2477   }
2478 
2479   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2480   /*
2481        vscale will contain the "diagonal" on processor scalings followed by the off processor
2482   */
2483   if (ctype == IS_COLORING_GLOBAL) {
2484     PetscInt *garray;
2485     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2486     for (i=0; i<baij->B->cmap->n/bs; i++) {
2487       for (j=0; j<bs; j++) {
2488         garray[i*bs+j] = bs*baij->garray[i]+j;
2489       }
2490     }
2491     ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2492     ierr = PetscFree(garray);CHKERRQ(ierr);
2493     CHKMEMQ;
2494     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2495     for (k=0; k<c->ncolors; k++) {
2496       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2497       for (l=0; l<c->nrows[k]; l++) {
2498         col = c->columnsforrow[k][l];
2499         if (col >= cstart && col < cend) {
2500           /* column is in diagonal block of matrix */
2501           colb = col - cstart;
2502         } else {
2503           /* column  is in "off-processor" part */
2504 #if defined(PETSC_USE_CTABLE)
2505           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2506           colb--;
2507 #else
2508           colb = baij->colmap[col] - 1;
2509 #endif
2510           colb  = colb/bs;
2511           colb += cend - cstart;
2512         }
2513         c->vscaleforrow[k][l] = colb;
2514       }
2515     }
2516   } else if (ctype == IS_COLORING_GHOSTED) {
2517     /* Get gtol mapping */
2518     PetscInt N = mat->cmap->N,nlocal,*gtol;
2519     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2520     for (i=0; i<N; i++) gtol[i] = -1;
2521     ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr);
2522     for (i=0; i<nlocal; i++) gtol[ltog[i]] = i;
2523 
2524     c->vscale = 0; /* will be created in MatFDColoringApply() */
2525     ierr      = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2526     for (k=0; k<c->ncolors; k++) {
2527       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2528       for (l=0; l<c->nrows[k]; l++) {
2529         col = c->columnsforrow[k][l];      /* global column index */
2530 
2531         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2532       }
2533     }
2534     ierr = PetscFree(gtol);CHKERRQ(ierr);
2535   }
2536   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2537 
2538   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2539   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2540   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2541   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2542   if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,&ltog);CHKERRQ(ierr);}
2543   CHKMEMQ;
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 #undef __FUNCT__
2548 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2549 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2550 {
2551   Mat            B;
2552   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2553   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2554   Mat_SeqAIJ     *b;
2555   PetscErrorCode ierr;
2556   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2557   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2558   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2559 
2560   PetscFunctionBegin;
2561   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2562   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2563 
2564   /* ----------------------------------------------------------------
2565      Tell every processor the number of nonzeros per row
2566   */
2567   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2568   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2569     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2570   }
2571   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2572   ierr      = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2573   displs    = recvcounts + size;
2574   for (i=0; i<size; i++) {
2575     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2576     displs[i]     = A->rmap->range[i]/bs;
2577   }
2578 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2579   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2580 #else
2581   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2582 #endif
2583   /* ---------------------------------------------------------------
2584      Create the sequential matrix of the same type as the local block diagonal
2585   */
2586   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2587   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2588   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2589   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2590   b    = (Mat_SeqAIJ*)B->data;
2591 
2592   /*--------------------------------------------------------------------
2593     Copy my part of matrix column indices over
2594   */
2595   sendcount  = ad->nz + bd->nz;
2596   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2597   a_jsendbuf = ad->j;
2598   b_jsendbuf = bd->j;
2599   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2600   cnt        = 0;
2601   for (i=0; i<n; i++) {
2602 
2603     /* put in lower diagonal portion */
2604     m = bd->i[i+1] - bd->i[i];
2605     while (m > 0) {
2606       /* is it above diagonal (in bd (compressed) numbering) */
2607       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2608       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2609       m--;
2610     }
2611 
2612     /* put in diagonal portion */
2613     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2614       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2615     }
2616 
2617     /* put in upper diagonal portion */
2618     while (m-- > 0) {
2619       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2620     }
2621   }
2622   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2623 
2624   /*--------------------------------------------------------------------
2625     Gather all column indices to all processors
2626   */
2627   for (i=0; i<size; i++) {
2628     recvcounts[i] = 0;
2629     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2630       recvcounts[i] += lens[j];
2631     }
2632   }
2633   displs[0] = 0;
2634   for (i=1; i<size; i++) {
2635     displs[i] = displs[i-1] + recvcounts[i-1];
2636   }
2637 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2638   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2639 #else
2640   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2641 #endif
2642   /*--------------------------------------------------------------------
2643     Assemble the matrix into useable form (note numerical values not yet set)
2644   */
2645   /* set the b->ilen (length of each row) values */
2646   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2647   /* set the b->i indices */
2648   b->i[0] = 0;
2649   for (i=1; i<=A->rmap->N/bs; i++) {
2650     b->i[i] = b->i[i-1] + lens[i-1];
2651   }
2652   ierr = PetscFree(lens);CHKERRQ(ierr);
2653   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2654   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2655   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2656 
2657   if (A->symmetric) {
2658     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2659   } else if (A->hermitian) {
2660     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2661   } else if (A->structurally_symmetric) {
2662     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2663   }
2664   *newmat = B;
2665   PetscFunctionReturn(0);
2666 }
2667 
2668 #undef __FUNCT__
2669 #define __FUNCT__ "MatSOR_MPIBAIJ"
2670 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2671 {
2672   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2673   PetscErrorCode ierr;
2674   Vec            bb1 = 0;
2675 
2676   PetscFunctionBegin;
2677   if (flag == SOR_APPLY_UPPER) {
2678     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2679     PetscFunctionReturn(0);
2680   }
2681 
2682   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2683     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2684   }
2685 
2686   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2687     if (flag & SOR_ZERO_INITIAL_GUESS) {
2688       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2689       its--;
2690     }
2691 
2692     while (its--) {
2693       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2694       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2695 
2696       /* update rhs: bb1 = bb - B*x */
2697       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2698       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2699 
2700       /* local sweep */
2701       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2702     }
2703   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2704     if (flag & SOR_ZERO_INITIAL_GUESS) {
2705       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2706       its--;
2707     }
2708     while (its--) {
2709       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2710       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2711 
2712       /* update rhs: bb1 = bb - B*x */
2713       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2714       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2715 
2716       /* local sweep */
2717       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2718     }
2719   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2720     if (flag & SOR_ZERO_INITIAL_GUESS) {
2721       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2722       its--;
2723     }
2724     while (its--) {
2725       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2726       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2727 
2728       /* update rhs: bb1 = bb - B*x */
2729       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2730       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2731 
2732       /* local sweep */
2733       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2734     }
2735   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2736 
2737   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2738   PetscFunctionReturn(0);
2739 }
2740 
2741 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2742 
2743 #undef __FUNCT__
2744 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2745 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2746 {
2747   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2748   PetscErrorCode ierr;
2749 
2750   PetscFunctionBegin;
2751   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2752   PetscFunctionReturn(0);
2753 }
2754 
2755 
2756 /* -------------------------------------------------------------------*/
2757 static struct _MatOps MatOps_Values = {
2758        MatSetValues_MPIBAIJ,
2759        MatGetRow_MPIBAIJ,
2760        MatRestoreRow_MPIBAIJ,
2761        MatMult_MPIBAIJ,
2762 /* 4*/ MatMultAdd_MPIBAIJ,
2763        MatMultTranspose_MPIBAIJ,
2764        MatMultTransposeAdd_MPIBAIJ,
2765        0,
2766        0,
2767        0,
2768 /*10*/ 0,
2769        0,
2770        0,
2771        MatSOR_MPIBAIJ,
2772        MatTranspose_MPIBAIJ,
2773 /*15*/ MatGetInfo_MPIBAIJ,
2774        MatEqual_MPIBAIJ,
2775        MatGetDiagonal_MPIBAIJ,
2776        MatDiagonalScale_MPIBAIJ,
2777        MatNorm_MPIBAIJ,
2778 /*20*/ MatAssemblyBegin_MPIBAIJ,
2779        MatAssemblyEnd_MPIBAIJ,
2780        MatSetOption_MPIBAIJ,
2781        MatZeroEntries_MPIBAIJ,
2782 /*24*/ MatZeroRows_MPIBAIJ,
2783        0,
2784        0,
2785        0,
2786        0,
2787 /*29*/ MatSetUp_MPIBAIJ,
2788        0,
2789        0,
2790        0,
2791        0,
2792 /*34*/ MatDuplicate_MPIBAIJ,
2793        0,
2794        0,
2795        0,
2796        0,
2797 /*39*/ MatAXPY_MPIBAIJ,
2798        MatGetSubMatrices_MPIBAIJ,
2799        MatIncreaseOverlap_MPIBAIJ,
2800        MatGetValues_MPIBAIJ,
2801        MatCopy_MPIBAIJ,
2802 /*44*/ 0,
2803        MatScale_MPIBAIJ,
2804        0,
2805        0,
2806        0,
2807 /*49*/ 0,
2808        0,
2809        0,
2810        0,
2811        0,
2812 /*54*/ MatFDColoringCreate_MPIBAIJ,
2813        0,
2814        MatSetUnfactored_MPIBAIJ,
2815        MatPermute_MPIBAIJ,
2816        MatSetValuesBlocked_MPIBAIJ,
2817 /*59*/ MatGetSubMatrix_MPIBAIJ,
2818        MatDestroy_MPIBAIJ,
2819        MatView_MPIBAIJ,
2820        0,
2821        0,
2822 /*64*/ 0,
2823        0,
2824        0,
2825        0,
2826        0,
2827 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2828        0,
2829        0,
2830        0,
2831        0,
2832 /*74*/ 0,
2833        MatFDColoringApply_BAIJ,
2834        0,
2835        0,
2836        0,
2837 /*79*/ 0,
2838        0,
2839        0,
2840        0,
2841        MatLoad_MPIBAIJ,
2842 /*84*/ 0,
2843        0,
2844        0,
2845        0,
2846        0,
2847 /*89*/ 0,
2848        0,
2849        0,
2850        0,
2851        0,
2852 /*94*/ 0,
2853        0,
2854        0,
2855        0,
2856        0,
2857 /*99*/ 0,
2858        0,
2859        0,
2860        0,
2861        0,
2862 /*104*/0,
2863        MatRealPart_MPIBAIJ,
2864        MatImaginaryPart_MPIBAIJ,
2865        0,
2866        0,
2867 /*109*/0,
2868        0,
2869        0,
2870        0,
2871        0,
2872 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2873        0,
2874        MatGetGhosts_MPIBAIJ,
2875        0,
2876        0,
2877 /*119*/0,
2878        0,
2879        0,
2880        0,
2881        0,
2882 /*124*/0,
2883        0,
2884        MatInvertBlockDiagonal_MPIBAIJ
2885 };
2886 
2887 EXTERN_C_BEGIN
2888 #undef __FUNCT__
2889 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2890 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2891 {
2892   PetscFunctionBegin;
2893   *a = ((Mat_MPIBAIJ*)A->data)->A;
2894   PetscFunctionReturn(0);
2895 }
2896 EXTERN_C_END
2897 
2898 EXTERN_C_BEGIN
2899 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2900 EXTERN_C_END
2901 
2902 EXTERN_C_BEGIN
2903 #undef __FUNCT__
2904 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2905 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2906 {
2907   PetscInt       m,rstart,cstart,cend;
2908   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2909   const PetscInt *JJ    =0;
2910   PetscScalar    *values=0;
2911   PetscErrorCode ierr;
2912 
2913   PetscFunctionBegin;
2914   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2915   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2916   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2917   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2918   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2919   m      = B->rmap->n/bs;
2920   rstart = B->rmap->rstart/bs;
2921   cstart = B->cmap->rstart/bs;
2922   cend   = B->cmap->rend/bs;
2923 
2924   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2925   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2926   for (i=0; i<m; i++) {
2927     nz = ii[i+1] - ii[i];
2928     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2929     nz_max = PetscMax(nz_max,nz);
2930     JJ     = jj + ii[i];
2931     for (j=0; j<nz; j++) {
2932       if (*JJ >= cstart) break;
2933       JJ++;
2934     }
2935     d = 0;
2936     for (; j<nz; j++) {
2937       if (*JJ++ >= cend) break;
2938       d++;
2939     }
2940     d_nnz[i] = d;
2941     o_nnz[i] = nz - d;
2942   }
2943   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2944   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2945 
2946   values = (PetscScalar*)V;
2947   if (!values) {
2948     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2949     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2950   }
2951   for (i=0; i<m; i++) {
2952     PetscInt          row    = i + rstart;
2953     PetscInt          ncols  = ii[i+1] - ii[i];
2954     const PetscInt    *icols = jj + ii[i];
2955     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2956     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2957   }
2958 
2959   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2960   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2961   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2962   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2963   PetscFunctionReturn(0);
2964 }
2965 EXTERN_C_END
2966 
2967 #undef __FUNCT__
2968 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2969 /*@C
2970    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2971    (the default parallel PETSc format).
2972 
2973    Collective on MPI_Comm
2974 
2975    Input Parameters:
2976 +  A - the matrix
2977 .  bs - the block size
2978 .  i - the indices into j for the start of each local row (starts with zero)
2979 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2980 -  v - optional values in the matrix
2981 
2982    Level: developer
2983 
2984 .keywords: matrix, aij, compressed row, sparse, parallel
2985 
2986 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2987 @*/
2988 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2989 {
2990   PetscErrorCode ierr;
2991 
2992   PetscFunctionBegin;
2993   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2994   PetscValidType(B,1);
2995   PetscValidLogicalCollectiveInt(B,bs,2);
2996   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2997   PetscFunctionReturn(0);
2998 }
2999 
3000 EXTERN_C_BEGIN
3001 #undef __FUNCT__
3002 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3003 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
3004 {
3005   Mat_MPIBAIJ    *b;
3006   PetscErrorCode ierr;
3007   PetscInt       i;
3008   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3009 
3010   PetscFunctionBegin;
3011   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3012   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3013 
3014   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3015   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3016   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3017   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3018 
3019   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3020   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3021   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3022   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3023   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3024 
3025   if (d_nnz) {
3026     for (i=0; i<B->rmap->n/bs; i++) {
3027       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3028     }
3029   }
3030   if (o_nnz) {
3031     for (i=0; i<B->rmap->n/bs; i++) {
3032       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3033     }
3034   }
3035 
3036   b      = (Mat_MPIBAIJ*)B->data;
3037   b->bs2 = bs*bs;
3038   b->mbs = B->rmap->n/bs;
3039   b->nbs = B->cmap->n/bs;
3040   b->Mbs = B->rmap->N/bs;
3041   b->Nbs = B->cmap->N/bs;
3042 
3043   for (i=0; i<=b->size; i++) {
3044     b->rangebs[i] = B->rmap->range[i]/bs;
3045   }
3046   b->rstartbs = B->rmap->rstart/bs;
3047   b->rendbs   = B->rmap->rend/bs;
3048   b->cstartbs = B->cmap->rstart/bs;
3049   b->cendbs   = B->cmap->rend/bs;
3050 
3051   if (!B->preallocated) {
3052     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3053     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3054     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3055     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3056     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3057     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3058     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3059     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3060     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
3061   }
3062 
3063   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3064   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3065   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3066   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3067   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3068   B->preallocated = PETSC_TRUE;
3069   PetscFunctionReturn(0);
3070 }
3071 EXTERN_C_END
3072 
3073 EXTERN_C_BEGIN
3074 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3075 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3076 EXTERN_C_END
3077 
3078 
3079 EXTERN_C_BEGIN
3080 #undef __FUNCT__
3081 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3082 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3083 {
3084   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3085   PetscErrorCode ierr;
3086   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3087   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3088   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3089 
3090   PetscFunctionBegin;
3091   ierr  = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3092   ii[0] = 0;
3093   CHKMEMQ;
3094   for (i=0; i<M; i++) {
3095     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3096     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3097     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3098     /* remove one from count of matrix has diagonal */
3099     for (j=id[i]; j<id[i+1]; j++) {
3100       if (jd[j] == i) {ii[i+1]--;break;}
3101     }
3102     CHKMEMQ;
3103   }
3104   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3105   cnt  = 0;
3106   for (i=0; i<M; i++) {
3107     for (j=io[i]; j<io[i+1]; j++) {
3108       if (garray[jo[j]] > rstart) break;
3109       jj[cnt++] = garray[jo[j]];
3110       CHKMEMQ;
3111     }
3112     for (k=id[i]; k<id[i+1]; k++) {
3113       if (jd[k] != i) {
3114         jj[cnt++] = rstart + jd[k];
3115         CHKMEMQ;
3116       }
3117     }
3118     for (; j<io[i+1]; j++) {
3119       jj[cnt++] = garray[jo[j]];
3120       CHKMEMQ;
3121     }
3122   }
3123   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
3124   PetscFunctionReturn(0);
3125 }
3126 EXTERN_C_END
3127 
3128 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3129 EXTERN_C_BEGIN
3130 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3131 EXTERN_C_END
3132 
3133 EXTERN_C_BEGIN
3134 #undef __FUNCT__
3135 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3136 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3137 {
3138   PetscErrorCode ierr;
3139   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3140   Mat            B;
3141   Mat_MPIAIJ     *b;
3142 
3143   PetscFunctionBegin;
3144   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
3145 
3146   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3147   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3148   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3149   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3150   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
3151   b    = (Mat_MPIAIJ*) B->data;
3152 
3153   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3154   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3155   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3156   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3157   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3158   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3159   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3160   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3161   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3162   if (reuse == MAT_REUSE_MATRIX) {
3163     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3164   } else {
3165    *newmat = B;
3166   }
3167   PetscFunctionReturn(0);
3168 }
3169 EXTERN_C_END
3170 
3171 EXTERN_C_BEGIN
3172 #if defined(PETSC_HAVE_MUMPS)
3173 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3174 #endif
3175 EXTERN_C_END
3176 
3177 /*MC
3178    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3179 
3180    Options Database Keys:
3181 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3182 . -mat_block_size <bs> - set the blocksize used to store the matrix
3183 - -mat_use_hash_table <fact>
3184 
3185   Level: beginner
3186 
3187 .seealso: MatCreateMPIBAIJ
3188 M*/
3189 
3190 EXTERN_C_BEGIN
3191 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3192 EXTERN_C_END
3193 
3194 EXTERN_C_BEGIN
3195 #undef __FUNCT__
3196 #define __FUNCT__ "MatCreate_MPIBAIJ"
3197 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3198 {
3199   Mat_MPIBAIJ    *b;
3200   PetscErrorCode ierr;
3201   PetscBool      flg;
3202 
3203   PetscFunctionBegin;
3204   ierr    = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3205   B->data = (void*)b;
3206 
3207   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3208   B->assembled = PETSC_FALSE;
3209 
3210   B->insertmode = NOT_SET_VALUES;
3211   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3212   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3213 
3214   /* build local table of row and column ownerships */
3215   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3216 
3217   /* build cache for off array entries formed */
3218   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3219 
3220   b->donotstash  = PETSC_FALSE;
3221   b->colmap      = NULL;
3222   b->garray      = NULL;
3223   b->roworiented = PETSC_TRUE;
3224 
3225   /* stuff used in block assembly */
3226   b->barray = 0;
3227 
3228   /* stuff used for matrix vector multiply */
3229   b->lvec  = 0;
3230   b->Mvctx = 0;
3231 
3232   /* stuff for MatGetRow() */
3233   b->rowindices   = 0;
3234   b->rowvalues    = 0;
3235   b->getrowactive = PETSC_FALSE;
3236 
3237   /* hash table stuff */
3238   b->ht           = 0;
3239   b->hd           = 0;
3240   b->ht_size      = 0;
3241   b->ht_flag      = PETSC_FALSE;
3242   b->ht_fact      = 0;
3243   b->ht_total_ct  = 0;
3244   b->ht_insert_ct = 0;
3245 
3246   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3247   b->ijonly = PETSC_FALSE;
3248 
3249   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3250   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3251   if (flg) {
3252     PetscReal fact = 1.39;
3253     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3254     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3255     if (fact <= 1.0) fact = 1.39;
3256     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3257     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3258   }
3259   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3260 
3261 #if defined(PETSC_HAVE_MUMPS)
3262   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3263 #endif
3264   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3265                                            "MatConvert_MPIBAIJ_MPIAdj",
3266                                            MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3267   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3268                                            "MatConvert_MPIBAIJ_MPIAIJ",
3269                                            MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3270   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3271                                            "MatConvert_MPIBAIJ_MPISBAIJ",
3272                                            MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3273   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3274                                            "MatStoreValues_MPIBAIJ",
3275                                            MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3276   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3277                                            "MatRetrieveValues_MPIBAIJ",
3278                                            MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3279   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3280                                            "MatGetDiagonalBlock_MPIBAIJ",
3281                                            MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3282   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3283                                            "MatMPIBAIJSetPreallocation_MPIBAIJ",
3284                                            MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3285   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3286                                            "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3287                                            MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3288   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3289                                            "MatDiagonalScaleLocal_MPIBAIJ",
3290                                            MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3291   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3292                                            "MatSetHashTableFactor_MPIBAIJ",
3293                                            MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3294   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3295                                            "MatConvert_MPIBAIJ_MPIBSTRM",
3296                                            MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3297   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3298   PetscFunctionReturn(0);
3299 }
3300 EXTERN_C_END
3301 
3302 /*MC
3303    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3304 
3305    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3306    and MATMPIBAIJ otherwise.
3307 
3308    Options Database Keys:
3309 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3310 
3311   Level: beginner
3312 
3313 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3314 M*/
3315 
3316 #undef __FUNCT__
3317 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3318 /*@C
3319    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3320    (block compressed row).  For good matrix assembly performance
3321    the user should preallocate the matrix storage by setting the parameters
3322    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3323    performance can be increased by more than a factor of 50.
3324 
3325    Collective on Mat
3326 
3327    Input Parameters:
3328 +  A - the matrix
3329 .  bs   - size of blockk
3330 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3331            submatrix  (same for all local rows)
3332 .  d_nnz - array containing the number of block nonzeros in the various block rows
3333            of the in diagonal portion of the local (possibly different for each block
3334            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3335            set it even if it is zero.
3336 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3337            submatrix (same for all local rows).
3338 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3339            off-diagonal portion of the local submatrix (possibly different for
3340            each block row) or NULL.
3341 
3342    If the *_nnz parameter is given then the *_nz parameter is ignored
3343 
3344    Options Database Keys:
3345 +   -mat_block_size - size of the blocks to use
3346 -   -mat_use_hash_table <fact>
3347 
3348    Notes:
3349    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3350    than it must be used on all processors that share the object for that argument.
3351 
3352    Storage Information:
3353    For a square global matrix we define each processor's diagonal portion
3354    to be its local rows and the corresponding columns (a square submatrix);
3355    each processor's off-diagonal portion encompasses the remainder of the
3356    local matrix (a rectangular submatrix).
3357 
3358    The user can specify preallocated storage for the diagonal part of
3359    the local submatrix with either d_nz or d_nnz (not both).  Set
3360    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3361    memory allocation.  Likewise, specify preallocated storage for the
3362    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3363 
3364    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3365    the figure below we depict these three local rows and all columns (0-11).
3366 
3367 .vb
3368            0 1 2 3 4 5 6 7 8 9 10 11
3369           -------------------
3370    row 3  |  o o o d d d o o o o o o
3371    row 4  |  o o o d d d o o o o o o
3372    row 5  |  o o o d d d o o o o o o
3373           -------------------
3374 .ve
3375 
3376    Thus, any entries in the d locations are stored in the d (diagonal)
3377    submatrix, and any entries in the o locations are stored in the
3378    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3379    stored simply in the MATSEQBAIJ format for compressed row storage.
3380 
3381    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3382    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3383    In general, for PDE problems in which most nonzeros are near the diagonal,
3384    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3385    or you will get TERRIBLE performance; see the users' manual chapter on
3386    matrices.
3387 
3388    You can call MatGetInfo() to get information on how effective the preallocation was;
3389    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3390    You can also run with the option -info and look for messages with the string
3391    malloc in them to see if additional memory allocation was needed.
3392 
3393    Level: intermediate
3394 
3395 .keywords: matrix, block, aij, compressed row, sparse, parallel
3396 
3397 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3398 @*/
3399 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3400 {
3401   PetscErrorCode ierr;
3402 
3403   PetscFunctionBegin;
3404   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3405   PetscValidType(B,1);
3406   PetscValidLogicalCollectiveInt(B,bs,2);
3407   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3408   PetscFunctionReturn(0);
3409 }
3410 
3411 #undef __FUNCT__
3412 #define __FUNCT__ "MatCreateBAIJ"
3413 /*@C
3414    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3415    (block compressed row).  For good matrix assembly performance
3416    the user should preallocate the matrix storage by setting the parameters
3417    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3418    performance can be increased by more than a factor of 50.
3419 
3420    Collective on MPI_Comm
3421 
3422    Input Parameters:
3423 +  comm - MPI communicator
3424 .  bs   - size of blockk
3425 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3426            This value should be the same as the local size used in creating the
3427            y vector for the matrix-vector product y = Ax.
3428 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3429            This value should be the same as the local size used in creating the
3430            x vector for the matrix-vector product y = Ax.
3431 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3432 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3433 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3434            submatrix  (same for all local rows)
3435 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3436            of the in diagonal portion of the local (possibly different for each block
3437            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3438            and set it even if it is zero.
3439 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3440            submatrix (same for all local rows).
3441 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3442            off-diagonal portion of the local submatrix (possibly different for
3443            each block row) or NULL.
3444 
3445    Output Parameter:
3446 .  A - the matrix
3447 
3448    Options Database Keys:
3449 +   -mat_block_size - size of the blocks to use
3450 -   -mat_use_hash_table <fact>
3451 
3452    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3453    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3454    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3455 
3456    Notes:
3457    If the *_nnz parameter is given then the *_nz parameter is ignored
3458 
3459    A nonzero block is any block that as 1 or more nonzeros in it
3460 
3461    The user MUST specify either the local or global matrix dimensions
3462    (possibly both).
3463 
3464    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3465    than it must be used on all processors that share the object for that argument.
3466 
3467    Storage Information:
3468    For a square global matrix we define each processor's diagonal portion
3469    to be its local rows and the corresponding columns (a square submatrix);
3470    each processor's off-diagonal portion encompasses the remainder of the
3471    local matrix (a rectangular submatrix).
3472 
3473    The user can specify preallocated storage for the diagonal part of
3474    the local submatrix with either d_nz or d_nnz (not both).  Set
3475    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3476    memory allocation.  Likewise, specify preallocated storage for the
3477    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3478 
3479    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3480    the figure below we depict these three local rows and all columns (0-11).
3481 
3482 .vb
3483            0 1 2 3 4 5 6 7 8 9 10 11
3484           -------------------
3485    row 3  |  o o o d d d o o o o o o
3486    row 4  |  o o o d d d o o o o o o
3487    row 5  |  o o o d d d o o o o o o
3488           -------------------
3489 .ve
3490 
3491    Thus, any entries in the d locations are stored in the d (diagonal)
3492    submatrix, and any entries in the o locations are stored in the
3493    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3494    stored simply in the MATSEQBAIJ format for compressed row storage.
3495 
3496    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3497    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3498    In general, for PDE problems in which most nonzeros are near the diagonal,
3499    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3500    or you will get TERRIBLE performance; see the users' manual chapter on
3501    matrices.
3502 
3503    Level: intermediate
3504 
3505 .keywords: matrix, block, aij, compressed row, sparse, parallel
3506 
3507 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3508 @*/
3509 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3510 {
3511   PetscErrorCode ierr;
3512   PetscMPIInt    size;
3513 
3514   PetscFunctionBegin;
3515   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3516   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3517   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3518   if (size > 1) {
3519     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3520     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3521   } else {
3522     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3523     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3524   }
3525   PetscFunctionReturn(0);
3526 }
3527 
3528 #undef __FUNCT__
3529 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3530 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3531 {
3532   Mat            mat;
3533   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3534   PetscErrorCode ierr;
3535   PetscInt       len=0;
3536 
3537   PetscFunctionBegin;
3538   *newmat = 0;
3539   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3540   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3541   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3542   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3543 
3544   mat->factortype   = matin->factortype;
3545   mat->preallocated = PETSC_TRUE;
3546   mat->assembled    = PETSC_TRUE;
3547   mat->insertmode   = NOT_SET_VALUES;
3548 
3549   a             = (Mat_MPIBAIJ*)mat->data;
3550   mat->rmap->bs = matin->rmap->bs;
3551   a->bs2        = oldmat->bs2;
3552   a->mbs        = oldmat->mbs;
3553   a->nbs        = oldmat->nbs;
3554   a->Mbs        = oldmat->Mbs;
3555   a->Nbs        = oldmat->Nbs;
3556 
3557   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3558   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3559 
3560   a->size         = oldmat->size;
3561   a->rank         = oldmat->rank;
3562   a->donotstash   = oldmat->donotstash;
3563   a->roworiented  = oldmat->roworiented;
3564   a->rowindices   = 0;
3565   a->rowvalues    = 0;
3566   a->getrowactive = PETSC_FALSE;
3567   a->barray       = 0;
3568   a->rstartbs     = oldmat->rstartbs;
3569   a->rendbs       = oldmat->rendbs;
3570   a->cstartbs     = oldmat->cstartbs;
3571   a->cendbs       = oldmat->cendbs;
3572 
3573   /* hash table stuff */
3574   a->ht           = 0;
3575   a->hd           = 0;
3576   a->ht_size      = 0;
3577   a->ht_flag      = oldmat->ht_flag;
3578   a->ht_fact      = oldmat->ht_fact;
3579   a->ht_total_ct  = 0;
3580   a->ht_insert_ct = 0;
3581 
3582   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3583   if (oldmat->colmap) {
3584 #if defined(PETSC_USE_CTABLE)
3585     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3586 #else
3587     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3588     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3589     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3590 #endif
3591   } else a->colmap = 0;
3592 
3593   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3594     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3595     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3596     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3597   } else a->garray = 0;
3598 
3599   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3600   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3601   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3602   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3603   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3604 
3605   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3606   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3607   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3608   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3609   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3610   *newmat = mat;
3611   PetscFunctionReturn(0);
3612 }
3613 
3614 #undef __FUNCT__
3615 #define __FUNCT__ "MatLoad_MPIBAIJ"
3616 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3617 {
3618   PetscErrorCode ierr;
3619   int            fd;
3620   PetscInt       i,nz,j,rstart,rend;
3621   PetscScalar    *vals,*buf;
3622   MPI_Comm       comm;
3623   MPI_Status     status;
3624   PetscMPIInt    rank,size,maxnz;
3625   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3626   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3627   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3628   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3629   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3630   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3631 
3632   PetscFunctionBegin;
3633   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3634   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3635   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3636   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3637 
3638   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3639   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3640   if (!rank) {
3641     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3642     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3643     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3644   }
3645 
3646   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3647 
3648   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3649   M    = header[1]; N = header[2];
3650 
3651   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3652   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3653   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3654 
3655   /* If global sizes are set, check if they are consistent with that given in the file */
3656   if (sizesset) {
3657     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3658   }
3659   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3660   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3661 
3662   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3663 
3664   /*
3665      This code adds extra rows to make sure the number of rows is
3666      divisible by the blocksize
3667   */
3668   Mbs        = M/bs;
3669   extra_rows = bs - M + bs*Mbs;
3670   if (extra_rows == bs) extra_rows = 0;
3671   else                  Mbs++;
3672   if (extra_rows && !rank) {
3673     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3674   }
3675 
3676   /* determine ownership of all rows */
3677   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3678     mbs = Mbs/size + ((Mbs % size) > rank);
3679     m   = mbs*bs;
3680   } else { /* User set */
3681     m   = newmat->rmap->n;
3682     mbs = m/bs;
3683   }
3684   ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3685   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3686 
3687   /* process 0 needs enough room for process with most rows */
3688   if (!rank) {
3689     mmax = rowners[1];
3690     for (i=2; i<=size; i++) {
3691       mmax = PetscMax(mmax,rowners[i]);
3692     }
3693     mmax*=bs;
3694   } else mmax = m;
3695 
3696   rowners[0] = 0;
3697   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3698   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3699   rstart = rowners[rank];
3700   rend   = rowners[rank+1];
3701 
3702   /* distribute row lengths to all processors */
3703   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3704   if (!rank) {
3705     mend = m;
3706     if (size == 1) mend = mend - extra_rows;
3707     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3708     for (j=mend; j<m; j++) locrowlens[j] = 1;
3709     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3710     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3711     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3712     for (j=0; j<m; j++) {
3713       procsnz[0] += locrowlens[j];
3714     }
3715     for (i=1; i<size; i++) {
3716       mend = browners[i+1] - browners[i];
3717       if (i == size-1) mend = mend - extra_rows;
3718       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3719       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3720       /* calculate the number of nonzeros on each processor */
3721       for (j=0; j<browners[i+1]-browners[i]; j++) {
3722         procsnz[i] += rowlengths[j];
3723       }
3724       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3725     }
3726     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3727   } else {
3728     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3729   }
3730 
3731   if (!rank) {
3732     /* determine max buffer needed and allocate it */
3733     maxnz = procsnz[0];
3734     for (i=1; i<size; i++) {
3735       maxnz = PetscMax(maxnz,procsnz[i]);
3736     }
3737     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3738 
3739     /* read in my part of the matrix column indices  */
3740     nz     = procsnz[0];
3741     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3742     mycols = ibuf;
3743     if (size == 1) nz -= extra_rows;
3744     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3745     if (size == 1) {
3746       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3747     }
3748 
3749     /* read in every ones (except the last) and ship off */
3750     for (i=1; i<size-1; i++) {
3751       nz   = procsnz[i];
3752       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3753       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3754     }
3755     /* read in the stuff for the last proc */
3756     if (size != 1) {
3757       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3758       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3759       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3760       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3761     }
3762     ierr = PetscFree(cols);CHKERRQ(ierr);
3763   } else {
3764     /* determine buffer space needed for message */
3765     nz = 0;
3766     for (i=0; i<m; i++) {
3767       nz += locrowlens[i];
3768     }
3769     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3770     mycols = ibuf;
3771     /* receive message of column indices*/
3772     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3773     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3774     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3775   }
3776 
3777   /* loop over local rows, determining number of off diagonal entries */
3778   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3779   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3780   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3781   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3782   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3783   rowcount = 0; nzcount = 0;
3784   for (i=0; i<mbs; i++) {
3785     dcount  = 0;
3786     odcount = 0;
3787     for (j=0; j<bs; j++) {
3788       kmax = locrowlens[rowcount];
3789       for (k=0; k<kmax; k++) {
3790         tmp = mycols[nzcount++]/bs;
3791         if (!mask[tmp]) {
3792           mask[tmp] = 1;
3793           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3794           else masked1[dcount++] = tmp;
3795         }
3796       }
3797       rowcount++;
3798     }
3799 
3800     dlens[i]  = dcount;
3801     odlens[i] = odcount;
3802 
3803     /* zero out the mask elements we set */
3804     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3805     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3806   }
3807 
3808 
3809   if (!sizesset) {
3810     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3811   }
3812   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3813 
3814   if (!rank) {
3815     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3816     /* read in my part of the matrix numerical values  */
3817     nz     = procsnz[0];
3818     vals   = buf;
3819     mycols = ibuf;
3820     if (size == 1) nz -= extra_rows;
3821     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3822     if (size == 1) {
3823       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3824     }
3825 
3826     /* insert into matrix */
3827     jj = rstart*bs;
3828     for (i=0; i<m; i++) {
3829       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3830       mycols += locrowlens[i];
3831       vals   += locrowlens[i];
3832       jj++;
3833     }
3834     /* read in other processors (except the last one) and ship out */
3835     for (i=1; i<size-1; i++) {
3836       nz   = procsnz[i];
3837       vals = buf;
3838       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3839       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3840     }
3841     /* the last proc */
3842     if (size != 1) {
3843       nz   = procsnz[i] - extra_rows;
3844       vals = buf;
3845       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3846       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3847       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3848     }
3849     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3850   } else {
3851     /* receive numeric values */
3852     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3853 
3854     /* receive message of values*/
3855     vals   = buf;
3856     mycols = ibuf;
3857     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3858 
3859     /* insert into matrix */
3860     jj = rstart*bs;
3861     for (i=0; i<m; i++) {
3862       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3863       mycols += locrowlens[i];
3864       vals   += locrowlens[i];
3865       jj++;
3866     }
3867   }
3868   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3869   ierr = PetscFree(buf);CHKERRQ(ierr);
3870   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3871   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3872   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3873   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3874   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3875   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3876   PetscFunctionReturn(0);
3877 }
3878 
3879 #undef __FUNCT__
3880 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3881 /*@
3882    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3883 
3884    Input Parameters:
3885 .  mat  - the matrix
3886 .  fact - factor
3887 
3888    Not Collective, each process can use a different factor
3889 
3890    Level: advanced
3891 
3892   Notes:
3893    This can also be set by the command line option: -mat_use_hash_table <fact>
3894 
3895 .keywords: matrix, hashtable, factor, HT
3896 
3897 .seealso: MatSetOption()
3898 @*/
3899 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3900 {
3901   PetscErrorCode ierr;
3902 
3903   PetscFunctionBegin;
3904   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3905   PetscFunctionReturn(0);
3906 }
3907 
3908 EXTERN_C_BEGIN
3909 #undef __FUNCT__
3910 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3911 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3912 {
3913   Mat_MPIBAIJ *baij;
3914 
3915   PetscFunctionBegin;
3916   baij          = (Mat_MPIBAIJ*)mat->data;
3917   baij->ht_fact = fact;
3918   PetscFunctionReturn(0);
3919 }
3920 EXTERN_C_END
3921 
3922 #undef __FUNCT__
3923 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3924 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3925 {
3926   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3927 
3928   PetscFunctionBegin;
3929   *Ad     = a->A;
3930   *Ao     = a->B;
3931   *colmap = a->garray;
3932   PetscFunctionReturn(0);
3933 }
3934 
3935 /*
3936     Special version for direct calls from Fortran (to eliminate two function call overheads
3937 */
3938 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3939 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3940 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3941 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3942 #endif
3943 
3944 #undef __FUNCT__
3945 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3946 /*@C
3947   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3948 
3949   Collective on Mat
3950 
3951   Input Parameters:
3952 + mat - the matrix
3953 . min - number of input rows
3954 . im - input rows
3955 . nin - number of input columns
3956 . in - input columns
3957 . v - numerical values input
3958 - addvin - INSERT_VALUES or ADD_VALUES
3959 
3960   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3961 
3962   Level: advanced
3963 
3964 .seealso:   MatSetValuesBlocked()
3965 @*/
3966 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3967 {
3968   /* convert input arguments to C version */
3969   Mat        mat  = *matin;
3970   PetscInt   m    = *min, n = *nin;
3971   InsertMode addv = *addvin;
3972 
3973   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3974   const MatScalar *value;
3975   MatScalar       *barray     = baij->barray;
3976   PetscBool       roworiented = baij->roworiented;
3977   PetscErrorCode  ierr;
3978   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3979   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3980   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3981 
3982   PetscFunctionBegin;
3983   /* tasks normally handled by MatSetValuesBlocked() */
3984   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3985 #if defined(PETSC_USE_DEBUG)
3986   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3987   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3988 #endif
3989   if (mat->assembled) {
3990     mat->was_assembled = PETSC_TRUE;
3991     mat->assembled     = PETSC_FALSE;
3992   }
3993   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3994 
3995 
3996   if (!barray) {
3997     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3998     baij->barray = barray;
3999   }
4000 
4001   if (roworiented) stepval = (n-1)*bs;
4002   else stepval = (m-1)*bs;
4003 
4004   for (i=0; i<m; i++) {
4005     if (im[i] < 0) continue;
4006 #if defined(PETSC_USE_DEBUG)
4007     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4008 #endif
4009     if (im[i] >= rstart && im[i] < rend) {
4010       row = im[i] - rstart;
4011       for (j=0; j<n; j++) {
4012         /* If NumCol = 1 then a copy is not required */
4013         if ((roworiented) && (n == 1)) {
4014           barray = (MatScalar*)v + i*bs2;
4015         } else if ((!roworiented) && (m == 1)) {
4016           barray = (MatScalar*)v + j*bs2;
4017         } else { /* Here a copy is required */
4018           if (roworiented) {
4019             value = v + i*(stepval+bs)*bs + j*bs;
4020           } else {
4021             value = v + j*(stepval+bs)*bs + i*bs;
4022           }
4023           for (ii=0; ii<bs; ii++,value+=stepval) {
4024             for (jj=0; jj<bs; jj++) {
4025               *barray++ = *value++;
4026             }
4027           }
4028           barray -=bs2;
4029         }
4030 
4031         if (in[j] >= cstart && in[j] < cend) {
4032           col  = in[j] - cstart;
4033           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4034         } else if (in[j] < 0) continue;
4035 #if defined(PETSC_USE_DEBUG)
4036         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4037 #endif
4038         else {
4039           if (mat->was_assembled) {
4040             if (!baij->colmap) {
4041               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4042             }
4043 
4044 #if defined(PETSC_USE_DEBUG)
4045 #if defined(PETSC_USE_CTABLE)
4046             { PetscInt data;
4047               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4048               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4049             }
4050 #else
4051             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4052 #endif
4053 #endif
4054 #if defined(PETSC_USE_CTABLE)
4055             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4056             col  = (col - 1)/bs;
4057 #else
4058             col = (baij->colmap[in[j]] - 1)/bs;
4059 #endif
4060             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4061               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4062               col  =  in[j];
4063             }
4064           } else col = in[j];
4065           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4066         }
4067       }
4068     } else {
4069       if (!baij->donotstash) {
4070         if (roworiented) {
4071           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4072         } else {
4073           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4074         }
4075       }
4076     }
4077   }
4078 
4079   /* task normally handled by MatSetValuesBlocked() */
4080   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4081   PetscFunctionReturn(0);
4082 }
4083 
4084 #undef __FUNCT__
4085 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4086 /*@
4087      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4088          CSR format the local rows.
4089 
4090    Collective on MPI_Comm
4091 
4092    Input Parameters:
4093 +  comm - MPI communicator
4094 .  bs - the block size, only a block size of 1 is supported
4095 .  m - number of local rows (Cannot be PETSC_DECIDE)
4096 .  n - This value should be the same as the local size used in creating the
4097        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4098        calculated if N is given) For square matrices n is almost always m.
4099 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4100 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4101 .   i - row indices
4102 .   j - column indices
4103 -   a - matrix values
4104 
4105    Output Parameter:
4106 .   mat - the matrix
4107 
4108    Level: intermediate
4109 
4110    Notes:
4111        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4112      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4113      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4114 
4115        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4116 
4117 .keywords: matrix, aij, compressed row, sparse, parallel
4118 
4119 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4120           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4121 @*/
4122 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4123 {
4124   PetscErrorCode ierr;
4125 
4126   PetscFunctionBegin;
4127   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4128   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4129   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4130   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4131   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4132   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4133   PetscFunctionReturn(0);
4134 }
4135