xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 5c8f6a953e7ed1c81f507d64aebddb11080b60e9)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++) {
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++) {
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig) {
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j];
226           else             value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         } else if (in[j] < 0) continue;
230 #if defined(PETSC_USE_DEBUG)
231         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
232 #endif
233         else {
234           if (mat->was_assembled) {
235             if (!baij->colmap) {
236               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined (PETSC_USE_CTABLE)
239             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
240             col  = col - 1;
241 #else
242             col = baij->colmap[in[j]/bs] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
245               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
246               col =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
248               B = baij->B;
249               b = (Mat_SeqBAIJ*)(B)->data;
250               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
251               ba=b->a;
252             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
253             else col += in[j]%bs;
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j];
256           else             value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         mat->assembled = PETSC_FALSE;
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray=baij->barray;
283   PetscBool         roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if (!barray) {
291     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) {
296     stepval = (n-1)*bs;
297   } else {
298     stepval = (m-1)*bs;
299   }
300   for (i=0; i<m; i++) {
301     if (im[i] < 0) continue;
302 #if defined(PETSC_USE_DEBUG)
303     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
304 #endif
305     if (im[i] >= rstart && im[i] < rend) {
306       row = im[i] - rstart;
307       for (j=0; j<n; j++) {
308         /* If NumCol = 1 then a copy is not required */
309         if ((roworiented) && (n == 1)) {
310           barray = (MatScalar*)v + i*bs2;
311         } else if ((!roworiented) && (m == 1)) {
312           barray = (MatScalar*)v + j*bs2;
313         } else { /* Here a copy is required */
314           if (roworiented) {
315             value = v + (i*(stepval+bs) + j)*bs;
316           } else {
317             value = v + (j*(stepval+bs) + i)*bs;
318           }
319           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
320             for (jj=0; jj<bs; jj++) {
321               barray[jj]  = value[jj];
322             }
323             barray += bs;
324           }
325           barray -= bs2;
326         }
327 
328         if (in[j] >= cstart && in[j] < cend) {
329           col  = in[j] - cstart;
330           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
331         }
332         else if (in[j] < 0) continue;
333 #if defined(PETSC_USE_DEBUG)
334         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
335 #endif
336         else {
337           if (mat->was_assembled) {
338             if (!baij->colmap) {
339               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
340             }
341 
342 #if defined(PETSC_USE_DEBUG)
343 #if defined (PETSC_USE_CTABLE)
344             { PetscInt data;
345               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
346               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
347             }
348 #else
349             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
350 #endif
351 #endif
352 #if defined (PETSC_USE_CTABLE)
353             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
354             col  = (col - 1)/bs;
355 #else
356             col = (baij->colmap[in[j]] - 1)/bs;
357 #endif
358             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
359               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
360               col =  in[j];
361             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
362           } else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
389   PetscBool      roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408       row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j];
413         else             value = v[i+j*m];
414         /* Look up PetscInto the Hash Table */
415         key = (row/bs)*Nbs+(col/bs)+1;
416         h1  = HASH(size,key,tmp);
417 
418 
419         idx = h1;
420 #if defined(PETSC_USE_DEBUG)
421         insert_ct++;
422         total_ct++;
423         if (HT[idx] != key) {
424           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
425           if (idx == size) {
426             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
427             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
428           }
429         }
430 #else
431         if (HT[idx] != key) {
432           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
433           if (idx == size) {
434             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
435             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453 #if defined(PETSC_USE_DEBUG)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463 {
464   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465   PetscBool         roworiented = baij->roworiented;
466   PetscErrorCode    ierr;
467   PetscInt          i,j,ii,jj,row,col;
468   PetscInt          rstart=baij->rstartbs;
469   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471   PetscReal         tmp;
472   MatScalar         **HD = baij->hd,*baij_a;
473   const PetscScalar *v_t,*value;
474 #if defined(PETSC_USE_DEBUG)
475   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476 #endif
477 
478   PetscFunctionBegin;
479   if (roworiented) {
480     stepval = (n-1)*bs;
481   } else {
482     stepval = (m-1)*bs;
483   }
484   for (i=0; i<m; i++) {
485 #if defined(PETSC_USE_DEBUG)
486     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
487     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
488 #endif
489     row   = im[i];
490     v_t   = v + i*nbs2;
491     if (row >= rstart && row < rend) {
492       for (j=0; j<n; j++) {
493         col = in[j];
494 
495         /* Look up into the Hash Table */
496         key = row*Nbs+col+1;
497         h1  = HASH(size,key,tmp);
498 
499         idx = h1;
500 #if defined(PETSC_USE_DEBUG)
501         total_ct++;
502         insert_ct++;
503        if (HT[idx] != key) {
504           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
505           if (idx == size) {
506             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
507             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
508           }
509         }
510 #else
511         if (HT[idx] != key) {
512           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
513           if (idx == size) {
514             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
515             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
516           }
517         }
518 #endif
519         baij_a = HD[idx];
520         if (roworiented) {
521           /*value = v + i*(stepval+bs)*bs + j*bs;*/
522           /* value = v + (i*(stepval+bs)+j)*bs; */
523           value = v_t;
524           v_t  += bs;
525           if (addv == ADD_VALUES) {
526             for (ii=0; ii<bs; ii++,value+=stepval) {
527               for (jj=ii; jj<bs2; jj+=bs) {
528                 baij_a[jj]  += *value++;
529               }
530             }
531           } else {
532             for (ii=0; ii<bs; ii++,value+=stepval) {
533               for (jj=ii; jj<bs2; jj+=bs) {
534                 baij_a[jj]  = *value++;
535               }
536             }
537           }
538         } else {
539           value = v + j*(stepval+bs)*bs + i*bs;
540           if (addv == ADD_VALUES) {
541             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
542               for (jj=0; jj<bs; jj++) {
543                 baij_a[jj]  += *value++;
544               }
545             }
546           } else {
547             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
548               for (jj=0; jj<bs; jj++) {
549                 baij_a[jj]  = *value++;
550               }
551             }
552           }
553         }
554       }
555     } else {
556       if (!baij->donotstash) {
557         if (roworiented) {
558           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
559         } else {
560           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
561         }
562       }
563     }
564   }
565 #if defined(PETSC_USE_DEBUG)
566   baij->ht_total_ct = total_ct;
567   baij->ht_insert_ct = insert_ct;
568 #endif
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatGetValues_MPIBAIJ"
574 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
575 {
576   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
577   PetscErrorCode ierr;
578   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
579   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
580 
581   PetscFunctionBegin;
582   for (i=0; i<m; i++) {
583     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
584     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
585     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
586       row = idxm[i] - bsrstart;
587       for (j=0; j<n; j++) {
588         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
589         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
590         if (idxn[j] >= bscstart && idxn[j] < bscend) {
591           col = idxn[j] - bscstart;
592           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
593         } else {
594           if (!baij->colmap) {
595             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
596           }
597 #if defined (PETSC_USE_CTABLE)
598           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
599           data --;
600 #else
601           data = baij->colmap[idxn[j]/bs]-1;
602 #endif
603           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
604           else {
605             col  = data + idxn[j]%bs;
606             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
607           }
608         }
609       }
610     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
611   }
612  PetscFunctionReturn(0);
613 }
614 
615 #undef __FUNCT__
616 #define __FUNCT__ "MatNorm_MPIBAIJ"
617 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
618 {
619   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
620   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
621   PetscErrorCode ierr;
622   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
623   PetscReal      sum = 0.0;
624   MatScalar      *v;
625 
626   PetscFunctionBegin;
627   if (baij->size == 1) {
628     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
629   } else {
630     if (type == NORM_FROBENIUS) {
631       v = amat->a;
632       nz = amat->nz*bs2;
633       for (i=0; i<nz; i++) {
634         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
635       }
636       v = bmat->a;
637       nz = bmat->nz*bs2;
638       for (i=0; i<nz; i++) {
639         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
640       }
641       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
642       *nrm = PetscSqrtReal(*nrm);
643     } else if (type == NORM_1) { /* max column sum */
644       PetscReal *tmp,*tmp2;
645       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
646       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
647       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
648       v = amat->a; jj = amat->j;
649       for (i=0; i<amat->nz; i++) {
650         for (j=0; j<bs; j++) {
651           col = bs*(cstart + *jj) + j; /* column index */
652           for (row=0; row<bs; row++) {
653             tmp[col] += PetscAbsScalar(*v);  v++;
654           }
655         }
656         jj++;
657       }
658       v = bmat->a; jj = bmat->j;
659       for (i=0; i<bmat->nz; i++) {
660         for (j=0; j<bs; j++) {
661           col = bs*garray[*jj] + j;
662           for (row=0; row<bs; row++) {
663             tmp[col] += PetscAbsScalar(*v); v++;
664           }
665         }
666         jj++;
667       }
668       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
669       *nrm = 0.0;
670       for (j=0; j<mat->cmap->N; j++) {
671         if (tmp2[j] > *nrm) *nrm = tmp2[j];
672       }
673       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
674     } else if (type == NORM_INFINITY) { /* max row sum */
675       PetscReal *sums;
676       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
677       sum = 0.0;
678       for (j=0; j<amat->mbs; j++) {
679         for (row=0; row<bs; row++) sums[row] = 0.0;
680         v = amat->a + bs2*amat->i[j];
681         nz = amat->i[j+1]-amat->i[j];
682         for (i=0; i<nz; i++) {
683           for (col=0; col<bs; col++) {
684             for (row=0; row<bs; row++) {
685               sums[row] += PetscAbsScalar(*v); v++;
686             }
687           }
688         }
689         v = bmat->a + bs2*bmat->i[j];
690         nz = bmat->i[j+1]-bmat->i[j];
691         for (i=0; i<nz; i++) {
692           for (col=0; col<bs; col++) {
693             for (row=0; row<bs; row++) {
694               sums[row] += PetscAbsScalar(*v); v++;
695             }
696           }
697         }
698         for (row=0; row<bs; row++) {
699           if (sums[row] > sum) sum = sums[row];
700         }
701       }
702       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
703       ierr = PetscFree(sums);CHKERRQ(ierr);
704     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
705   }
706   PetscFunctionReturn(0);
707 }
708 
709 /*
710   Creates the hash table, and sets the table
711   This table is created only once.
712   If new entried need to be added to the matrix
713   then the hash table has to be destroyed and
714   recreated.
715 */
716 #undef __FUNCT__
717 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
718 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
719 {
720   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
721   Mat            A = baij->A,B=baij->B;
722   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
723   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
724   PetscErrorCode ierr;
725   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
726   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
727   PetscInt       *HT,key;
728   MatScalar      **HD;
729   PetscReal      tmp;
730 #if defined(PETSC_USE_INFO)
731   PetscInt       ct=0,max=0;
732 #endif
733 
734   PetscFunctionBegin;
735   if (baij->ht) PetscFunctionReturn(0);
736 
737   baij->ht_size = (PetscInt)(factor*nz);
738   ht_size       = baij->ht_size;
739 
740   /* Allocate Memory for Hash Table */
741   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
742   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
743   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
744   HD   = baij->hd;
745   HT   = baij->ht;
746 
747   /* Loop Over A */
748   for (i=0; i<a->mbs; i++) {
749     for (j=ai[i]; j<ai[i+1]; j++) {
750       row = i+rstart;
751       col = aj[j]+cstart;
752 
753       key = row*Nbs + col + 1;
754       h1  = HASH(ht_size,key,tmp);
755       for (k=0; k<ht_size; k++) {
756         if (!HT[(h1+k)%ht_size]) {
757           HT[(h1+k)%ht_size] = key;
758           HD[(h1+k)%ht_size] = a->a + j*bs2;
759           break;
760 #if defined(PETSC_USE_INFO)
761         } else {
762           ct++;
763 #endif
764         }
765       }
766 #if defined(PETSC_USE_INFO)
767       if (k> max) max = k;
768 #endif
769     }
770   }
771   /* Loop Over B */
772   for (i=0; i<b->mbs; i++) {
773     for (j=bi[i]; j<bi[i+1]; j++) {
774       row = i+rstart;
775       col = garray[bj[j]];
776       key = row*Nbs + col + 1;
777       h1  = HASH(ht_size,key,tmp);
778       for (k=0; k<ht_size; k++) {
779         if (!HT[(h1+k)%ht_size]) {
780           HT[(h1+k)%ht_size] = key;
781           HD[(h1+k)%ht_size] = b->a + j*bs2;
782           break;
783 #if defined(PETSC_USE_INFO)
784         } else {
785           ct++;
786 #endif
787         }
788       }
789 #if defined(PETSC_USE_INFO)
790       if (k> max) max = k;
791 #endif
792     }
793   }
794 
795   /* Print Summary */
796 #if defined(PETSC_USE_INFO)
797   for (i=0,j=0; i<ht_size; i++) {
798     if (HT[i]) {j++;}
799   }
800   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
801 #endif
802   PetscFunctionReturn(0);
803 }
804 
805 #undef __FUNCT__
806 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
807 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
808 {
809   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
810   PetscErrorCode ierr;
811   PetscInt       nstash,reallocs;
812   InsertMode     addv;
813 
814   PetscFunctionBegin;
815   if (baij->donotstash || mat->nooffprocentries) {
816     PetscFunctionReturn(0);
817   }
818 
819   /* make sure all processors are either in INSERTMODE or ADDMODE */
820   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
821   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
822   mat->insertmode = addv; /* in case this processor had no cache */
823 
824   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
825   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
826   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
827   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
828   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
829   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
835 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
836 {
837   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
838   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
839   PetscErrorCode ierr;
840   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
841   PetscInt       *row,*col;
842   PetscBool      r1,r2,r3,other_disassembled;
843   MatScalar      *val;
844   InsertMode     addv = mat->insertmode;
845   PetscMPIInt    n;
846 
847   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
848   PetscFunctionBegin;
849   if (!baij->donotstash && !mat->nooffprocentries) {
850     while (1) {
851       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
852       if (!flg) break;
853 
854       for (i=0; i<n;) {
855         /* Now identify the consecutive vals belonging to the same row */
856         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
857         if (j < n) ncols = j-i;
858         else       ncols = n-i;
859         /* Now assemble all these values with a single function call */
860         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
861         i = j;
862       }
863     }
864     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
865     /* Now process the block-stash. Since the values are stashed column-oriented,
866        set the roworiented flag to column oriented, and after MatSetValues()
867        restore the original flags */
868     r1 = baij->roworiented;
869     r2 = a->roworiented;
870     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
871     baij->roworiented = PETSC_FALSE;
872     a->roworiented    = PETSC_FALSE;
873     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
874     while (1) {
875       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
876       if (!flg) break;
877 
878       for (i=0; i<n;) {
879         /* Now identify the consecutive vals belonging to the same row */
880         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
881         if (j < n) ncols = j-i;
882         else       ncols = n-i;
883         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
884         i = j;
885       }
886     }
887     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
888     baij->roworiented = r1;
889     a->roworiented    = r2;
890     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
891   }
892 
893   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
894   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
895 
896   /* determine if any processor has disassembled, if so we must
897      also disassemble ourselfs, in order that we may reassemble. */
898   /*
899      if nonzero structure of submatrix B cannot change then we know that
900      no processor disassembled thus we can skip this stuff
901   */
902   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
903     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
904     if (mat->was_assembled && !other_disassembled) {
905       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
906     }
907   }
908 
909   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
910     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
911   }
912   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
913   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
914   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
915 
916 #if defined(PETSC_USE_INFO)
917   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
918     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
919     baij->ht_total_ct  = 0;
920     baij->ht_insert_ct = 0;
921   }
922 #endif
923   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
924     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
925     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
926     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
927   }
928 
929   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
930   baij->rowvalues = 0;
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
936 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
937 {
938   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
939   PetscErrorCode    ierr;
940   PetscMPIInt       size = baij->size,rank = baij->rank;
941   PetscInt          bs = mat->rmap->bs;
942   PetscBool         iascii,isdraw;
943   PetscViewer       sviewer;
944   PetscViewerFormat format;
945 
946   PetscFunctionBegin;
947   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
948   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
949   if (iascii) {
950     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
951     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
952       MatInfo info;
953       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
954       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
955       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
956       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
957              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
958       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
959       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
960       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
961       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
962       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
963       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
964       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
965       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
966       PetscFunctionReturn(0);
967     } else if (format == PETSC_VIEWER_ASCII_INFO) {
968       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
969       PetscFunctionReturn(0);
970     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
971       PetscFunctionReturn(0);
972     }
973   }
974 
975   if (isdraw) {
976     PetscDraw       draw;
977     PetscBool  isnull;
978     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
979     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
980   }
981 
982   if (size == 1) {
983     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
984     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
985   } else {
986     /* assemble the entire matrix onto first processor. */
987     Mat         A;
988     Mat_SeqBAIJ *Aloc;
989     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
990     MatScalar   *a;
991 
992     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
993     /* Perhaps this should be the type of mat? */
994     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
995     if (!rank) {
996       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
997     } else {
998       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
999     }
1000     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1001     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1002     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1003     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1004 
1005     /* copy over the A part */
1006     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1007     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1008     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1009 
1010     for (i=0; i<mbs; i++) {
1011       rvals[0] = bs*(baij->rstartbs + i);
1012       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1013       for (j=ai[i]; j<ai[i+1]; j++) {
1014         col = (baij->cstartbs+aj[j])*bs;
1015         for (k=0; k<bs; k++) {
1016           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1017           col++; a += bs;
1018         }
1019       }
1020     }
1021     /* copy over the B part */
1022     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1023     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1024     for (i=0; i<mbs; i++) {
1025       rvals[0] = bs*(baij->rstartbs + i);
1026       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1027       for (j=ai[i]; j<ai[i+1]; j++) {
1028         col = baij->garray[aj[j]]*bs;
1029         for (k=0; k<bs; k++) {
1030           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1031           col++; a += bs;
1032         }
1033       }
1034     }
1035     ierr = PetscFree(rvals);CHKERRQ(ierr);
1036     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1037     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038     /*
1039        Everyone has to call to draw the matrix since the graphics waits are
1040        synchronized across all processors that share the PetscDraw object
1041     */
1042     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1043     if (!rank) {
1044       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1045     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1046       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1047       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1048     }
1049     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1050     ierr = MatDestroy(&A);CHKERRQ(ierr);
1051   }
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 #undef __FUNCT__
1056 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1057 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1058 {
1059   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1060   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1061   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1062   PetscErrorCode ierr;
1063   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1064   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1065   int            fd;
1066   PetscScalar    *column_values;
1067   FILE           *file;
1068   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1069   PetscInt       message_count,flowcontrolcount;
1070 
1071   PetscFunctionBegin;
1072   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1073   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1074   nz   = bs2*(A->nz + B->nz);
1075   rlen = mat->rmap->n;
1076   if (!rank) {
1077     header[0] = MAT_FILE_CLASSID;
1078     header[1] = mat->rmap->N;
1079     header[2] = mat->cmap->N;
1080     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1081     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1082     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1083     /* get largest number of rows any processor has */
1084     range = mat->rmap->range;
1085     for (i=1; i<size; i++) {
1086       rlen = PetscMax(rlen,range[i+1] - range[i]);
1087     }
1088   } else {
1089     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1090   }
1091 
1092   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1093   /* compute lengths of each row  */
1094   for (i=0; i<a->mbs; i++) {
1095     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1096   }
1097   /* store the row lengths to the file */
1098   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1099   if (!rank) {
1100     MPI_Status status;
1101     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1102     rlen  = (range[1] - range[0])/bs;
1103     for (i=0; i<rlen; i++) {
1104       for (j=0; j<bs; j++) {
1105         row_lens[i*bs+j] = bs*crow_lens[i];
1106       }
1107     }
1108     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1109     for (i=1; i<size; i++) {
1110       rlen = (range[i+1] - range[i])/bs;
1111       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1112       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1113       for (k=0; k<rlen; k++) {
1114         for (j=0; j<bs; j++) {
1115           row_lens[k*bs+j] = bs*crow_lens[k];
1116         }
1117       }
1118       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1119     }
1120     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1121     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1122   } else {
1123     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1124     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1125     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1126   }
1127   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1128 
1129   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1130      information needed to make it for each row from a block row. This does require more communication but still not more than
1131      the communication needed for the nonzero values  */
1132   nzmax = nz; /*  space a largest processor needs */
1133   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1134   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1135   cnt  = 0;
1136   for (i=0; i<a->mbs; i++) {
1137     pcnt = cnt;
1138     for (j=B->i[i]; j<B->i[i+1]; j++) {
1139       if ((col = garray[B->j[j]]) > cstart) break;
1140       for (l=0; l<bs; l++) {
1141         column_indices[cnt++] = bs*col+l;
1142       }
1143     }
1144     for (k=A->i[i]; k<A->i[i+1]; k++) {
1145       for (l=0; l<bs; l++) {
1146         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1147       }
1148     }
1149     for (; j<B->i[i+1]; j++) {
1150       for (l=0; l<bs; l++) {
1151         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1152       }
1153     }
1154     len = cnt - pcnt;
1155     for (k=1; k<bs; k++) {
1156       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1157       cnt += len;
1158     }
1159   }
1160   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1161 
1162   /* store the columns to the file */
1163   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1164   if (!rank) {
1165     MPI_Status status;
1166     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1167     for (i=1; i<size; i++) {
1168       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1169       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1170       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1171       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1172     }
1173     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1174   } else {
1175     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1176     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1177     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1178     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1179   }
1180   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1181 
1182   /* load up the numerical values */
1183   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1184   cnt = 0;
1185   for (i=0; i<a->mbs; i++) {
1186     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1187     for (j=B->i[i]; j<B->i[i+1]; j++) {
1188       if (garray[B->j[j]] > cstart) break;
1189       for (l=0; l<bs; l++) {
1190         for (ll=0; ll<bs; ll++) {
1191           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1192         }
1193       }
1194       cnt += bs;
1195     }
1196     for (k=A->i[i]; k<A->i[i+1]; k++) {
1197       for (l=0; l<bs; l++) {
1198         for (ll=0; ll<bs; ll++) {
1199           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1200         }
1201       }
1202       cnt += bs;
1203     }
1204     for (; j<B->i[i+1]; j++) {
1205       for (l=0; l<bs; l++) {
1206         for (ll=0; ll<bs; ll++) {
1207           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1208         }
1209       }
1210       cnt += bs;
1211     }
1212     cnt += (bs-1)*rlen;
1213   }
1214   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1215 
1216   /* store the column values to the file */
1217   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1218   if (!rank) {
1219     MPI_Status status;
1220     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1221     for (i=1; i<size; i++) {
1222       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1223       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1224       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1225       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1226     }
1227     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1228   } else {
1229     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1230     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1231     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1232     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1233   }
1234   ierr = PetscFree(column_values);CHKERRQ(ierr);
1235 
1236   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1237   if (file) {
1238     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1239   }
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNCT__
1244 #define __FUNCT__ "MatView_MPIBAIJ"
1245 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1246 {
1247   PetscErrorCode ierr;
1248   PetscBool      iascii,isdraw,issocket,isbinary;
1249 
1250   PetscFunctionBegin;
1251   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1254   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1255   if (iascii || isdraw || issocket) {
1256     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1257   } else if (isbinary) {
1258     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1259   }
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1265 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1266 {
1267   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1268   PetscErrorCode ierr;
1269 
1270   PetscFunctionBegin;
1271 #if defined(PETSC_USE_LOG)
1272   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1273 #endif
1274   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1275   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1276   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1277   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1278 #if defined (PETSC_USE_CTABLE)
1279   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1280 #else
1281   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1282 #endif
1283   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1284   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1285   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1286   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1287   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1288   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1289   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1290   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1291 
1292   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatMult_MPIBAIJ"
1307 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1308 {
1309   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311   PetscInt       nt;
1312 
1313   PetscFunctionBegin;
1314   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1315   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1316   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1317   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1318   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1319   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1320   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1321   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1327 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1328 {
1329   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1334   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1335   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1336   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1342 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1343 {
1344   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1345   PetscErrorCode ierr;
1346   PetscBool      merged;
1347 
1348   PetscFunctionBegin;
1349   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1350   /* do nondiagonal part */
1351   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1352   if (!merged) {
1353     /* send it on its way */
1354     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1355     /* do local part */
1356     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1357     /* receive remote parts: note this assumes the values are not actually */
1358     /* inserted in yy until the next line */
1359     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1360   } else {
1361     /* do local part */
1362     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1363     /* send it on its way */
1364     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1365     /* values actually were received in the Begin() but we need to call this nop */
1366     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1367   }
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1373 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1374 {
1375   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1376   PetscErrorCode ierr;
1377 
1378   PetscFunctionBegin;
1379   /* do nondiagonal part */
1380   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1381   /* send it on its way */
1382   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383   /* do local part */
1384   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1385   /* receive remote parts: note this assumes the values are not actually */
1386   /* inserted in yy until the next line, which is true for my implementation*/
1387   /* but is not perhaps always true. */
1388   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 /*
1393   This only works correctly for square matrices where the subblock A->A is the
1394    diagonal block
1395 */
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1398 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1399 {
1400   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1401   PetscErrorCode ierr;
1402 
1403   PetscFunctionBegin;
1404   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1405   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatScale_MPIBAIJ"
1411 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1412 {
1413   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1414   PetscErrorCode ierr;
1415 
1416   PetscFunctionBegin;
1417   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1418   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1424 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1425 {
1426   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1427   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1428   PetscErrorCode ierr;
1429   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1430   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1431   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1432 
1433   PetscFunctionBegin;
1434   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1435   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1436   mat->getrowactive = PETSC_TRUE;
1437 
1438   if (!mat->rowvalues && (idx || v)) {
1439     /*
1440         allocate enough space to hold information from the longest row.
1441     */
1442     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1443     PetscInt     max = 1,mbs = mat->mbs,tmp;
1444     for (i=0; i<mbs; i++) {
1445       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1446       if (max < tmp) { max = tmp; }
1447     }
1448     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1449   }
1450   lrow = row - brstart;
1451 
1452   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1453   if (!v)   {pvA = 0; pvB = 0;}
1454   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1455   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1456   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1457   nztot = nzA + nzB;
1458 
1459   cmap  = mat->garray;
1460   if (v  || idx) {
1461     if (nztot) {
1462       /* Sort by increasing column numbers, assuming A and B already sorted */
1463       PetscInt imark = -1;
1464       if (v) {
1465         *v = v_p = mat->rowvalues;
1466         for (i=0; i<nzB; i++) {
1467           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1468           else break;
1469         }
1470         imark = i;
1471         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1472         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1473       }
1474       if (idx) {
1475         *idx = idx_p = mat->rowindices;
1476         if (imark > -1) {
1477           for (i=0; i<imark; i++) {
1478             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1479           }
1480         } else {
1481           for (i=0; i<nzB; i++) {
1482             if (cmap[cworkB[i]/bs] < cstart)
1483               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1484             else break;
1485           }
1486           imark = i;
1487         }
1488         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1489         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1490       }
1491     } else {
1492       if (idx) *idx = 0;
1493       if (v)   *v   = 0;
1494     }
1495   }
1496   *nz = nztot;
1497   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1498   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1504 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1505 {
1506   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1507 
1508   PetscFunctionBegin;
1509   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1510   baij->getrowactive = PETSC_FALSE;
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1516 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1517 {
1518   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1519   PetscErrorCode ierr;
1520 
1521   PetscFunctionBegin;
1522   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1523   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 #undef __FUNCT__
1528 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1529 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1530 {
1531   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1532   Mat            A = a->A,B = a->B;
1533   PetscErrorCode ierr;
1534   PetscReal      isend[5],irecv[5];
1535 
1536   PetscFunctionBegin;
1537   info->block_size     = (PetscReal)matin->rmap->bs;
1538   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1539   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1540   isend[3] = info->memory;  isend[4] = info->mallocs;
1541   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1542   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1543   isend[3] += info->memory;  isend[4] += info->mallocs;
1544   if (flag == MAT_LOCAL) {
1545     info->nz_used      = isend[0];
1546     info->nz_allocated = isend[1];
1547     info->nz_unneeded  = isend[2];
1548     info->memory       = isend[3];
1549     info->mallocs      = isend[4];
1550   } else if (flag == MAT_GLOBAL_MAX) {
1551     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1552     info->nz_used      = irecv[0];
1553     info->nz_allocated = irecv[1];
1554     info->nz_unneeded  = irecv[2];
1555     info->memory       = irecv[3];
1556     info->mallocs      = irecv[4];
1557   } else if (flag == MAT_GLOBAL_SUM) {
1558     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1559     info->nz_used      = irecv[0];
1560     info->nz_allocated = irecv[1];
1561     info->nz_unneeded  = irecv[2];
1562     info->memory       = irecv[3];
1563     info->mallocs      = irecv[4];
1564   } else SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1565   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1566   info->fill_ratio_needed = 0;
1567   info->factor_mallocs    = 0;
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1573 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1574 {
1575   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1576   PetscErrorCode ierr;
1577 
1578   PetscFunctionBegin;
1579   switch (op) {
1580   case MAT_NEW_NONZERO_LOCATIONS:
1581   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1582   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1583   case MAT_KEEP_NONZERO_PATTERN:
1584   case MAT_NEW_NONZERO_LOCATION_ERR:
1585     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1586     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1587     break;
1588   case MAT_ROW_ORIENTED:
1589     a->roworiented = flg;
1590     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1591     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1592     break;
1593   case MAT_NEW_DIAGONALS:
1594     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1595     break;
1596   case MAT_IGNORE_OFF_PROC_ENTRIES:
1597     a->donotstash = flg;
1598     break;
1599   case MAT_USE_HASH_TABLE:
1600     a->ht_flag = flg;
1601     break;
1602   case MAT_SYMMETRIC:
1603   case MAT_STRUCTURALLY_SYMMETRIC:
1604   case MAT_HERMITIAN:
1605   case MAT_SYMMETRY_ETERNAL:
1606     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1607     break;
1608   default:
1609     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1610   }
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 #undef __FUNCT__
1615 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1616 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1617 {
1618   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1619   Mat_SeqBAIJ    *Aloc;
1620   Mat            B;
1621   PetscErrorCode ierr;
1622   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1623   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1624   MatScalar      *a;
1625 
1626   PetscFunctionBegin;
1627   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1628   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1629     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1630     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1631     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1632     /* Do not know preallocation information, but must set block size */
1633     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr);
1634   } else {
1635     B = *matout;
1636   }
1637 
1638   /* copy over the A part */
1639   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1640   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1641   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1642 
1643   for (i=0; i<mbs; i++) {
1644     rvals[0] = bs*(baij->rstartbs + i);
1645     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1646     for (j=ai[i]; j<ai[i+1]; j++) {
1647       col = (baij->cstartbs+aj[j])*bs;
1648       for (k=0; k<bs; k++) {
1649         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1650         col++; a += bs;
1651       }
1652     }
1653   }
1654   /* copy over the B part */
1655   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1656   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1657   for (i=0; i<mbs; i++) {
1658     rvals[0] = bs*(baij->rstartbs + i);
1659     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1660     for (j=ai[i]; j<ai[i+1]; j++) {
1661       col = baij->garray[aj[j]]*bs;
1662       for (k=0; k<bs; k++) {
1663         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1664         col++; a += bs;
1665       }
1666     }
1667   }
1668   ierr = PetscFree(rvals);CHKERRQ(ierr);
1669   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1671 
1672   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1673     *matout = B;
1674   } else {
1675     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1676   }
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 #undef __FUNCT__
1681 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1682 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1683 {
1684   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1685   Mat            a = baij->A,b = baij->B;
1686   PetscErrorCode ierr;
1687   PetscInt       s1,s2,s3;
1688 
1689   PetscFunctionBegin;
1690   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1691   if (rr) {
1692     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1693     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1694     /* Overlap communication with computation. */
1695     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1696   }
1697   if (ll) {
1698     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1699     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1700     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1701   }
1702   /* scale  the diagonal block */
1703   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1704 
1705   if (rr) {
1706     /* Do a scatter end and then right scale the off-diagonal block */
1707     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1708     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1709   }
1710 
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1716 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1717 {
1718   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1719   PetscErrorCode    ierr;
1720   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1721   PetscInt          i,*owners = A->rmap->range;
1722   PetscInt          *nprocs,j,idx,nsends,row;
1723   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1724   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1725   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1726   MPI_Comm          comm = ((PetscObject)A)->comm;
1727   MPI_Request       *send_waits,*recv_waits;
1728   MPI_Status        recv_status,*send_status;
1729   const PetscScalar *xx;
1730   PetscScalar       *bb;
1731 #if defined(PETSC_DEBUG)
1732   PetscBool         found = PETSC_FALSE;
1733 #endif
1734 
1735   PetscFunctionBegin;
1736   /*  first count number of contributors to each processor */
1737   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1738   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1739   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1740   j = 0;
1741   for (i=0; i<N; i++) {
1742     if (lastidx > (idx = rows[i])) j = 0;
1743     lastidx = idx;
1744     for (; j<size; j++) {
1745       if (idx >= owners[j] && idx < owners[j+1]) {
1746         nprocs[2*j]++;
1747         nprocs[2*j+1] = 1;
1748         owner[i] = j;
1749 #if defined(PETSC_DEBUG)
1750         found = PETSC_TRUE;
1751 #endif
1752         break;
1753       }
1754     }
1755 #if defined(PETSC_DEBUG)
1756     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1757     found = PETSC_FALSE;
1758 #endif
1759   }
1760   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1761 
1762   if (A->nooffproczerorows) {
1763     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1764     nrecvs = nsends;
1765     nmax   = N;
1766   } else {
1767     /* inform other processors of number of messages and max length*/
1768     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1769   }
1770 
1771   /* post receives:   */
1772   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1773   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1774   for (i=0; i<nrecvs; i++) {
1775     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1776   }
1777 
1778   /* do sends:
1779      1) starts[i] gives the starting index in svalues for stuff going to
1780      the ith processor
1781   */
1782   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1783   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1784   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1785   starts[0]  = 0;
1786   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1787   for (i=0; i<N; i++) {
1788     svalues[starts[owner[i]]++] = rows[i];
1789   }
1790 
1791   starts[0] = 0;
1792   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1793   count = 0;
1794   for (i=0; i<size; i++) {
1795     if (nprocs[2*i+1]) {
1796       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1797     }
1798   }
1799   ierr = PetscFree(starts);CHKERRQ(ierr);
1800 
1801   base = owners[rank];
1802 
1803   /*  wait on receives */
1804   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1805   count  = nrecvs;
1806   slen = 0;
1807   while (count) {
1808     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1809     /* unpack receives into our local space */
1810     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1811     source[imdex]  = recv_status.MPI_SOURCE;
1812     lens[imdex]    = n;
1813     slen          += n;
1814     count--;
1815   }
1816   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1817 
1818   /* move the data into the send scatter */
1819   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1820   count = 0;
1821   for (i=0; i<nrecvs; i++) {
1822     values = rvalues + i*nmax;
1823     for (j=0; j<lens[i]; j++) {
1824       lrows[count++] = values[j] - base;
1825     }
1826   }
1827   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1828   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1829   ierr = PetscFree(owner);CHKERRQ(ierr);
1830   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1831 
1832   /* fix right hand side if needed */
1833   if (x && b) {
1834     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1835     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1836     for (i=0; i<slen; i++) {
1837       bb[lrows[i]] = diag*xx[lrows[i]];
1838     }
1839     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1840     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1841   }
1842 
1843   /* actually zap the local rows */
1844   /*
1845         Zero the required rows. If the "diagonal block" of the matrix
1846      is square and the user wishes to set the diagonal we use separate
1847      code so that MatSetValues() is not called for each diagonal allocating
1848      new memory, thus calling lots of mallocs and slowing things down.
1849 
1850   */
1851   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1852   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1853   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1854     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1855   } else if (diag != 0.0) {
1856     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1857     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1858        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1859     for (i=0; i<slen; i++) {
1860       row  = lrows[i] + rstart_bs;
1861       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1862     }
1863     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1864     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1865   } else {
1866     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1867   }
1868 
1869   ierr = PetscFree(lrows);CHKERRQ(ierr);
1870 
1871   /* wait on sends */
1872   if (nsends) {
1873     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1874     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1875     ierr = PetscFree(send_status);CHKERRQ(ierr);
1876   }
1877   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1878   ierr = PetscFree(svalues);CHKERRQ(ierr);
1879 
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1885 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1886 {
1887   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1888   PetscErrorCode ierr;
1889 
1890   PetscFunctionBegin;
1891   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1896 
1897 #undef __FUNCT__
1898 #define __FUNCT__ "MatEqual_MPIBAIJ"
1899 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1900 {
1901   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1902   Mat            a,b,c,d;
1903   PetscBool      flg;
1904   PetscErrorCode ierr;
1905 
1906   PetscFunctionBegin;
1907   a = matA->A; b = matA->B;
1908   c = matB->A; d = matB->B;
1909 
1910   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1911   if (flg) {
1912     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1913   }
1914   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatCopy_MPIBAIJ"
1920 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1921 {
1922   PetscErrorCode ierr;
1923   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1924   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1925 
1926   PetscFunctionBegin;
1927   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1928   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1929     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1930   } else {
1931     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1932     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1933   }
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 #undef __FUNCT__
1938 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1939 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1940 {
1941   PetscErrorCode ierr;
1942 
1943   PetscFunctionBegin;
1944   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1950 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1951 {
1952   PetscErrorCode ierr;
1953   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1954   PetscBLASInt   bnz,one=1;
1955   Mat_SeqBAIJ    *x,*y;
1956 
1957   PetscFunctionBegin;
1958   if (str == SAME_NONZERO_PATTERN) {
1959     PetscScalar alpha = a;
1960     x = (Mat_SeqBAIJ *)xx->A->data;
1961     y = (Mat_SeqBAIJ *)yy->A->data;
1962     bnz = PetscBLASIntCast(x->nz);
1963     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1964     x = (Mat_SeqBAIJ *)xx->B->data;
1965     y = (Mat_SeqBAIJ *)yy->B->data;
1966     bnz = PetscBLASIntCast(x->nz);
1967     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1968   } else {
1969     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1970   }
1971   PetscFunctionReturn(0);
1972 }
1973 
1974 #undef __FUNCT__
1975 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1976 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1977 {
1978   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1979   PetscErrorCode ierr;
1980 
1981   PetscFunctionBegin;
1982   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1983   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 #undef __FUNCT__
1988 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1989 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1990 {
1991   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1996   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 #undef __FUNCT__
2001 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2002 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2003 {
2004   PetscErrorCode ierr;
2005   IS             iscol_local;
2006   PetscInt       csize;
2007 
2008   PetscFunctionBegin;
2009   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2010   if (call == MAT_REUSE_MATRIX) {
2011     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2012     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2013   } else {
2014     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2015   }
2016   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2017   if (call == MAT_INITIAL_MATRIX) {
2018     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2019     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2020   }
2021   PetscFunctionReturn(0);
2022 }
2023 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2024 #undef __FUNCT__
2025 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2026 /*
2027   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2028   in local and then by concatenating the local matrices the end result.
2029   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2030 */
2031 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2032 {
2033   PetscErrorCode ierr;
2034   PetscMPIInt    rank,size;
2035   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2036   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2037   Mat            M,Mreuse;
2038   MatScalar      *vwork,*aa;
2039   MPI_Comm       comm = ((PetscObject)mat)->comm;
2040   IS             isrow_new, iscol_new;
2041   PetscBool      idflag,allrows, allcols;
2042   Mat_SeqBAIJ    *aij;
2043 
2044 
2045   PetscFunctionBegin;
2046   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2047   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2048   /* The compression and expansion should be avoided. Doesn't point
2049      out errors, might change the indices, hence buggey */
2050   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2051   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2052 
2053   /* Check for special case: each processor gets entire matrix columns */
2054   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2055   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2056   if (idflag && ncol == mat->cmap->N) {
2057     allcols = PETSC_TRUE;
2058   } else {
2059     allcols = PETSC_FALSE;
2060   }
2061 
2062   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2063   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2064   if (idflag && nrow == mat->rmap->N) {
2065     allrows = PETSC_TRUE;
2066   } else {
2067     allrows = PETSC_FALSE;
2068   }
2069   if (call ==  MAT_REUSE_MATRIX) {
2070     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2071     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2072     ierr  = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2073   } else {
2074     ierr   = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2075   }
2076   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2077   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2078   /*
2079       m - number of local rows
2080       n - number of columns (same on all processors)
2081       rstart - first row in new global matrix generated
2082   */
2083   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2084   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2085   m    = m/bs;
2086   n    = n/bs;
2087 
2088   if (call == MAT_INITIAL_MATRIX) {
2089     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2090     ii  = aij->i;
2091     jj  = aij->j;
2092 
2093     /*
2094         Determine the number of non-zeros in the diagonal and off-diagonal
2095         portions of the matrix in order to do correct preallocation
2096     */
2097 
2098     /* first get start and end of "diagonal" columns */
2099     if (csize == PETSC_DECIDE) {
2100       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2101       if (mglobal == n*bs) { /* square matrix */
2102         nlocal = m;
2103       } else {
2104         nlocal = n/size + ((n % size) > rank);
2105       }
2106     } else {
2107       nlocal = csize/bs;
2108     }
2109     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2110     rstart = rend - nlocal;
2111     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2112 
2113     /* next, compute all the lengths */
2114     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2115     olens = dlens + m;
2116     for (i=0; i<m; i++) {
2117       jend = ii[i+1] - ii[i];
2118       olen = 0;
2119       dlen = 0;
2120       for (j=0; j<jend; j++) {
2121         if (*jj < rstart || *jj >= rend) olen++;
2122         else dlen++;
2123         jj++;
2124       }
2125       olens[i] = olen;
2126       dlens[i] = dlen;
2127     }
2128     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2129     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2130     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2131     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2132     ierr = PetscFree(dlens);CHKERRQ(ierr);
2133   } else {
2134     PetscInt ml,nl;
2135 
2136     M = *newmat;
2137     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2138     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2139     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2140     /*
2141          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2142        rather than the slower MatSetValues().
2143     */
2144     M->was_assembled = PETSC_TRUE;
2145     M->assembled     = PETSC_FALSE;
2146   }
2147   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2148   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2149   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2150   ii  = aij->i;
2151   jj  = aij->j;
2152   aa  = aij->a;
2153   for (i=0; i<m; i++) {
2154     row   = rstart/bs + i;
2155     nz    = ii[i+1] - ii[i];
2156     cwork = jj;     jj += nz;
2157     vwork = aa;     aa += nz*bs*bs;
2158     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2159   }
2160 
2161   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2162   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2163   *newmat = M;
2164 
2165   /* save submatrix used in processor for next request */
2166   if (call ==  MAT_INITIAL_MATRIX) {
2167     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2168     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2169   }
2170 
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatPermute_MPIBAIJ"
2176 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2177 {
2178   MPI_Comm       comm,pcomm;
2179   PetscInt       first,rlocal_size,clocal_size,nrows;
2180   const PetscInt *rows;
2181   PetscMPIInt    size;
2182   IS             crowp,growp,irowp,lrowp,lcolp;
2183   PetscErrorCode ierr;
2184 
2185   PetscFunctionBegin;
2186   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2187   /* make a collective version of 'rowp' */
2188   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2189   if (pcomm==comm) {
2190     crowp = rowp;
2191   } else {
2192     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2193     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2194     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2195     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2196   }
2197   /* collect the global row permutation and invert it */
2198   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2199   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2200   if (pcomm!=comm) {
2201     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2202   }
2203   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2204   ierr = ISDestroy(&growp);CHKERRQ(ierr);
2205   /* get the local target indices */
2206   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2207   ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr);
2208   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2209   ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2210   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2211   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2212   /* the column permutation is so much easier;
2213      make a local version of 'colp' and invert it */
2214   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2215   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2216   if (size==1) {
2217     lcolp = colp;
2218   } else {
2219     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2220   }
2221   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2222   /* now we just get the submatrix */
2223   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2224   if (size>1) {
2225     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2226   }
2227   /* clean up */
2228   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 #undef __FUNCT__
2233 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2234 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2235 {
2236   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2237   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2238 
2239   PetscFunctionBegin;
2240   if (nghosts) { *nghosts = B->nbs;}
2241   if (ghosts) {*ghosts = baij->garray;}
2242   PetscFunctionReturn(0);
2243 }
2244 
2245 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2246 
2247 #undef __FUNCT__
2248 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2249 /*
2250     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2251 */
2252 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2253 {
2254   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2255   PetscErrorCode        ierr;
2256   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2257   PetscInt              bs,i,n,nrows,j,k,m,ncols,col;
2258   const PetscInt        *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj;
2259   PetscInt              nis = iscoloring->n,nctot,*cols;
2260   PetscInt              *rowhit,M,cstart,cend,colb;
2261   PetscInt              *columnsforrow,l;
2262   IS                    *isa;
2263   PetscBool              done,flg;
2264   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2265   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2266 
2267   PetscFunctionBegin;
2268   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2269   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2270 
2271   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2272   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2273   M                = mat->rmap->n/bs;
2274   cstart           = mat->cmap->rstart/bs;
2275   cend             = mat->cmap->rend/bs;
2276   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2277   c->N             = mat->cmap->N/bs;
2278   c->m             = mat->rmap->n/bs;
2279   c->rstart        = mat->rmap->rstart/bs;
2280 
2281   c->ncolors       = nis;
2282   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2283   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2284   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2285   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2286   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2287   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2288 
2289   /* Allow access to data structures of local part of matrix */
2290   if (!baij->colmap) {
2291     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2292   }
2293   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2294   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2295 
2296   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2297   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2298 
2299   for (i=0; i<nis; i++) {
2300     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2301     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2302     c->ncolumns[i] = n;
2303     if (n) {
2304       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2305       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2306       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2307     } else {
2308       c->columns[i]  = 0;
2309     }
2310 
2311     if (ctype == IS_COLORING_GLOBAL) {
2312       /* Determine the total (parallel) number of columns of this color */
2313       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2314       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2315 
2316       nn   = PetscMPIIntCast(n);
2317       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2318       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2319       if (!nctot) {
2320         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2321       }
2322 
2323       disp[0] = 0;
2324       for (j=1; j<size; j++) {
2325         disp[j] = disp[j-1] + ncolsonproc[j-1];
2326       }
2327 
2328       /* Get complete list of columns for color on each processor */
2329       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2330       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2331       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2332     } else if (ctype == IS_COLORING_GHOSTED) {
2333       /* Determine local number of columns of this color on this process, including ghost points */
2334       nctot = n;
2335       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2336       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2337     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2338 
2339     /*
2340        Mark all rows affect by these columns
2341     */
2342     /* Temporary option to allow for debugging/testing */
2343     flg  = PETSC_FALSE;
2344     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2345     if (!flg) {/*-----------------------------------------------------------------------------*/
2346       /* crude, fast version */
2347       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2348       /* loop over columns*/
2349       for (j=0; j<nctot; j++) {
2350         if (ctype == IS_COLORING_GHOSTED) {
2351           col = ltog[cols[j]];
2352         } else {
2353           col  = cols[j];
2354         }
2355         if (col >= cstart && col < cend) {
2356           /* column is in diagonal block of matrix */
2357           rows = A_cj + A_ci[col-cstart];
2358           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2359         } else {
2360 #if defined (PETSC_USE_CTABLE)
2361           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2362           colb --;
2363 #else
2364           colb = baij->colmap[col] - 1;
2365 #endif
2366           if (colb == -1) {
2367             m = 0;
2368           } else {
2369             colb = colb/bs;
2370             rows = B_cj + B_ci[colb];
2371             m    = B_ci[colb+1] - B_ci[colb];
2372           }
2373         }
2374         /* loop over columns marking them in rowhit */
2375         for (k=0; k<m; k++) {
2376           rowhit[*rows++] = col + 1;
2377         }
2378       }
2379 
2380       /* count the number of hits */
2381       nrows = 0;
2382       for (j=0; j<M; j++) {
2383         if (rowhit[j]) nrows++;
2384       }
2385       c->nrows[i]         = nrows;
2386       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2387       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2388       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2389       nrows = 0;
2390       for (j=0; j<M; j++) {
2391         if (rowhit[j]) {
2392           c->rows[i][nrows]           = j;
2393           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2394           nrows++;
2395         }
2396       }
2397     } else {/*-------------------------------------------------------------------------------*/
2398       /* slow version, using rowhit as a linked list */
2399       PetscInt currentcol,fm,mfm;
2400       rowhit[M] = M;
2401       nrows     = 0;
2402       /* loop over columns*/
2403       for (j=0; j<nctot; j++) {
2404         if (ctype == IS_COLORING_GHOSTED) {
2405           col = ltog[cols[j]];
2406         } else {
2407           col  = cols[j];
2408         }
2409         if (col >= cstart && col < cend) {
2410           /* column is in diagonal block of matrix */
2411           rows = A_cj + A_ci[col-cstart];
2412           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2413         } else {
2414 #if defined (PETSC_USE_CTABLE)
2415           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2416           colb --;
2417 #else
2418           colb = baij->colmap[col] - 1;
2419 #endif
2420           if (colb == -1) {
2421             m = 0;
2422           } else {
2423             colb = colb/bs;
2424             rows = B_cj + B_ci[colb];
2425             m    = B_ci[colb+1] - B_ci[colb];
2426           }
2427         }
2428 
2429         /* loop over columns marking them in rowhit */
2430         fm    = M; /* fm points to first entry in linked list */
2431         for (k=0; k<m; k++) {
2432           currentcol = *rows++;
2433           /* is it already in the list? */
2434           do {
2435             mfm  = fm;
2436             fm   = rowhit[fm];
2437           } while (fm < currentcol);
2438           /* not in list so add it */
2439           if (fm != currentcol) {
2440             nrows++;
2441             columnsforrow[currentcol] = col;
2442             /* next three lines insert new entry into linked list */
2443             rowhit[mfm]               = currentcol;
2444             rowhit[currentcol]        = fm;
2445             fm                        = currentcol;
2446             /* fm points to present position in list since we know the columns are sorted */
2447           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2448         }
2449       }
2450       c->nrows[i]         = nrows;
2451       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2452       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2453       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2454       /* now store the linked list of rows into c->rows[i] */
2455       nrows = 0;
2456       fm    = rowhit[M];
2457       do {
2458         c->rows[i][nrows]            = fm;
2459         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2460         fm                           = rowhit[fm];
2461       } while (fm < M);
2462     } /* ---------------------------------------------------------------------------------------*/
2463     ierr = PetscFree(cols);CHKERRQ(ierr);
2464   }
2465 
2466   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2467   /*
2468        vscale will contain the "diagonal" on processor scalings followed by the off processor
2469   */
2470   if (ctype == IS_COLORING_GLOBAL) {
2471     PetscInt *garray;
2472     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2473     for (i=0; i<baij->B->cmap->n/bs; i++) {
2474       for (j=0; j<bs; j++) {
2475         garray[i*bs+j] = bs*baij->garray[i]+j;
2476       }
2477     }
2478     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2479     ierr = PetscFree(garray);CHKERRQ(ierr);
2480     CHKMEMQ;
2481     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2482     for (k=0; k<c->ncolors; k++) {
2483       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2484       for (l=0; l<c->nrows[k]; l++) {
2485         col = c->columnsforrow[k][l];
2486         if (col >= cstart && col < cend) {
2487           /* column is in diagonal block of matrix */
2488           colb = col - cstart;
2489         } else {
2490           /* column  is in "off-processor" part */
2491 #if defined (PETSC_USE_CTABLE)
2492           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2493           colb --;
2494 #else
2495           colb = baij->colmap[col] - 1;
2496 #endif
2497           colb = colb/bs;
2498           colb += cend - cstart;
2499         }
2500         c->vscaleforrow[k][l] = colb;
2501       }
2502     }
2503   } else if (ctype == IS_COLORING_GHOSTED) {
2504     /* Get gtol mapping */
2505     PetscInt N = mat->cmap->N, *gtol;
2506     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2507     for (i=0; i<N; i++) gtol[i] = -1;
2508     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2509 
2510     c->vscale = 0; /* will be created in MatFDColoringApply() */
2511     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2512     for (k=0; k<c->ncolors; k++) {
2513       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2514       for (l=0; l<c->nrows[k]; l++) {
2515         col = c->columnsforrow[k][l];      /* global column index */
2516         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2517       }
2518     }
2519     ierr = PetscFree(gtol);CHKERRQ(ierr);
2520   }
2521   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2522 
2523   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2524   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2525   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2526   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2527     CHKMEMQ;
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 #undef __FUNCT__
2532 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2533 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2534 {
2535   Mat            B;
2536   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2537   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2538   Mat_SeqAIJ     *b;
2539   PetscErrorCode ierr;
2540   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2541   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2542   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2543 
2544   PetscFunctionBegin;
2545   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2546   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2547 
2548   /* ----------------------------------------------------------------
2549      Tell every processor the number of nonzeros per row
2550   */
2551   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2552   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2553     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2554   }
2555   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2556   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2557   displs     = recvcounts + size;
2558   for (i=0; i<size; i++) {
2559     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2560     displs[i]     = A->rmap->range[i]/bs;
2561   }
2562 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2563   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2564 #else
2565   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2566 #endif
2567   /* ---------------------------------------------------------------
2568      Create the sequential matrix of the same type as the local block diagonal
2569   */
2570   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2571   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2572   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2573   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2574   b = (Mat_SeqAIJ *)B->data;
2575 
2576   /*--------------------------------------------------------------------
2577     Copy my part of matrix column indices over
2578   */
2579   sendcount  = ad->nz + bd->nz;
2580   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2581   a_jsendbuf = ad->j;
2582   b_jsendbuf = bd->j;
2583   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2584   cnt        = 0;
2585   for (i=0; i<n; i++) {
2586 
2587     /* put in lower diagonal portion */
2588     m = bd->i[i+1] - bd->i[i];
2589     while (m > 0) {
2590       /* is it above diagonal (in bd (compressed) numbering) */
2591       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2592       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2593       m--;
2594     }
2595 
2596     /* put in diagonal portion */
2597     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2598       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2599     }
2600 
2601     /* put in upper diagonal portion */
2602     while (m-- > 0) {
2603       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2604     }
2605   }
2606   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2607 
2608   /*--------------------------------------------------------------------
2609     Gather all column indices to all processors
2610   */
2611   for (i=0; i<size; i++) {
2612     recvcounts[i] = 0;
2613     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2614       recvcounts[i] += lens[j];
2615     }
2616   }
2617   displs[0]  = 0;
2618   for (i=1; i<size; i++) {
2619     displs[i] = displs[i-1] + recvcounts[i-1];
2620   }
2621 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2622   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2623 #else
2624   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2625 #endif
2626   /*--------------------------------------------------------------------
2627     Assemble the matrix into useable form (note numerical values not yet set)
2628   */
2629   /* set the b->ilen (length of each row) values */
2630   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2631   /* set the b->i indices */
2632   b->i[0] = 0;
2633   for (i=1; i<=A->rmap->N/bs; i++) {
2634     b->i[i] = b->i[i-1] + lens[i-1];
2635   }
2636   ierr = PetscFree(lens);CHKERRQ(ierr);
2637   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2638   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2639   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2640 
2641   if (A->symmetric) {
2642     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2643   } else if (A->hermitian) {
2644     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2645   } else if (A->structurally_symmetric) {
2646     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2647   }
2648   *newmat = B;
2649   PetscFunctionReturn(0);
2650 }
2651 
2652 #undef __FUNCT__
2653 #define __FUNCT__ "MatSOR_MPIBAIJ"
2654 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2655 {
2656   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2657   PetscErrorCode ierr;
2658   Vec            bb1 = 0;
2659 
2660   PetscFunctionBegin;
2661   if (flag == SOR_APPLY_UPPER) {
2662     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2663     PetscFunctionReturn(0);
2664   }
2665 
2666   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2667     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2668   }
2669 
2670   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2671     if (flag & SOR_ZERO_INITIAL_GUESS) {
2672       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2673       its--;
2674     }
2675 
2676     while (its--) {
2677       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2678       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2679 
2680       /* update rhs: bb1 = bb - B*x */
2681       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2682       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2683 
2684       /* local sweep */
2685       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2686     }
2687   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2688     if (flag & SOR_ZERO_INITIAL_GUESS) {
2689       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2690       its--;
2691     }
2692     while (its--) {
2693       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2694       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2695 
2696       /* update rhs: bb1 = bb - B*x */
2697       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2698       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2699 
2700       /* local sweep */
2701       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2702     }
2703   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2704     if (flag & SOR_ZERO_INITIAL_GUESS) {
2705       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2706       its--;
2707     }
2708     while (its--) {
2709       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2710       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2711 
2712       /* update rhs: bb1 = bb - B*x */
2713       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2714       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2715 
2716       /* local sweep */
2717       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2718     }
2719   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2720 
2721   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2722   PetscFunctionReturn(0);
2723 }
2724 
2725 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2726 
2727 #undef __FUNCT__
2728 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2729 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2730 {
2731   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2732   PetscErrorCode ierr;
2733 
2734   PetscFunctionBegin;
2735   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 
2740 /* -------------------------------------------------------------------*/
2741 static struct _MatOps MatOps_Values = {
2742        MatSetValues_MPIBAIJ,
2743        MatGetRow_MPIBAIJ,
2744        MatRestoreRow_MPIBAIJ,
2745        MatMult_MPIBAIJ,
2746 /* 4*/ MatMultAdd_MPIBAIJ,
2747        MatMultTranspose_MPIBAIJ,
2748        MatMultTransposeAdd_MPIBAIJ,
2749        0,
2750        0,
2751        0,
2752 /*10*/ 0,
2753        0,
2754        0,
2755        MatSOR_MPIBAIJ,
2756        MatTranspose_MPIBAIJ,
2757 /*15*/ MatGetInfo_MPIBAIJ,
2758        MatEqual_MPIBAIJ,
2759        MatGetDiagonal_MPIBAIJ,
2760        MatDiagonalScale_MPIBAIJ,
2761        MatNorm_MPIBAIJ,
2762 /*20*/ MatAssemblyBegin_MPIBAIJ,
2763        MatAssemblyEnd_MPIBAIJ,
2764        MatSetOption_MPIBAIJ,
2765        MatZeroEntries_MPIBAIJ,
2766 /*24*/ MatZeroRows_MPIBAIJ,
2767        0,
2768        0,
2769        0,
2770        0,
2771 /*29*/ MatSetUp_MPIBAIJ,
2772        0,
2773        0,
2774        0,
2775        0,
2776 /*34*/ MatDuplicate_MPIBAIJ,
2777        0,
2778        0,
2779        0,
2780        0,
2781 /*39*/ MatAXPY_MPIBAIJ,
2782        MatGetSubMatrices_MPIBAIJ,
2783        MatIncreaseOverlap_MPIBAIJ,
2784        MatGetValues_MPIBAIJ,
2785        MatCopy_MPIBAIJ,
2786 /*44*/ 0,
2787        MatScale_MPIBAIJ,
2788        0,
2789        0,
2790        0,
2791 /*49*/ 0,
2792        0,
2793        0,
2794        0,
2795        0,
2796 /*54*/ MatFDColoringCreate_MPIBAIJ,
2797        0,
2798        MatSetUnfactored_MPIBAIJ,
2799        MatPermute_MPIBAIJ,
2800        MatSetValuesBlocked_MPIBAIJ,
2801 /*59*/ MatGetSubMatrix_MPIBAIJ,
2802        MatDestroy_MPIBAIJ,
2803        MatView_MPIBAIJ,
2804        0,
2805        0,
2806 /*64*/ 0,
2807        0,
2808        0,
2809        0,
2810        0,
2811 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2812        0,
2813        0,
2814        0,
2815        0,
2816 /*74*/ 0,
2817        MatFDColoringApply_BAIJ,
2818        0,
2819        0,
2820        0,
2821 /*79*/ 0,
2822        0,
2823        0,
2824        0,
2825        MatLoad_MPIBAIJ,
2826 /*84*/ 0,
2827        0,
2828        0,
2829        0,
2830        0,
2831 /*89*/ 0,
2832        0,
2833        0,
2834        0,
2835        0,
2836 /*94*/ 0,
2837        0,
2838        0,
2839        0,
2840        0,
2841 /*99*/ 0,
2842        0,
2843        0,
2844        0,
2845        0,
2846 /*104*/0,
2847        MatRealPart_MPIBAIJ,
2848        MatImaginaryPart_MPIBAIJ,
2849        0,
2850        0,
2851 /*109*/0,
2852        0,
2853        0,
2854        0,
2855        0,
2856 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2857        0,
2858        MatGetGhosts_MPIBAIJ,
2859        0,
2860        0,
2861 /*119*/0,
2862        0,
2863        0,
2864        0,
2865        0,
2866 /*124*/0,
2867        0,
2868        MatInvertBlockDiagonal_MPIBAIJ
2869 };
2870 
2871 EXTERN_C_BEGIN
2872 #undef __FUNCT__
2873 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2874 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2875 {
2876   PetscFunctionBegin;
2877   *a = ((Mat_MPIBAIJ *)A->data)->A;
2878   PetscFunctionReturn(0);
2879 }
2880 EXTERN_C_END
2881 
2882 EXTERN_C_BEGIN
2883 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2884 EXTERN_C_END
2885 
2886 EXTERN_C_BEGIN
2887 #undef __FUNCT__
2888 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2889 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2890 {
2891   PetscInt       m,rstart,cstart,cend;
2892   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2893   const PetscInt *JJ=0;
2894   PetscScalar    *values=0;
2895   PetscErrorCode ierr;
2896 
2897   PetscFunctionBegin;
2898   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2899   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2900   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2901   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2902   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2903   m      = B->rmap->n/bs;
2904   rstart = B->rmap->rstart/bs;
2905   cstart = B->cmap->rstart/bs;
2906   cend   = B->cmap->rend/bs;
2907 
2908   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2909   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2910   for (i=0; i<m; i++) {
2911     nz = ii[i+1] - ii[i];
2912     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2913     nz_max = PetscMax(nz_max,nz);
2914     JJ  = jj + ii[i];
2915     for (j=0; j<nz; j++) {
2916       if (*JJ >= cstart) break;
2917       JJ++;
2918     }
2919     d = 0;
2920     for (; j<nz; j++) {
2921       if (*JJ++ >= cend) break;
2922       d++;
2923     }
2924     d_nnz[i] = d;
2925     o_nnz[i] = nz - d;
2926   }
2927   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2928   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2929 
2930   values = (PetscScalar*)V;
2931   if (!values) {
2932     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2933     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2934   }
2935   for (i=0; i<m; i++) {
2936     PetscInt          row    = i + rstart;
2937     PetscInt          ncols  = ii[i+1] - ii[i];
2938     const PetscInt    *icols = jj + ii[i];
2939     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2940     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2941   }
2942 
2943   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2944   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2945   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2946   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2947   PetscFunctionReturn(0);
2948 }
2949 EXTERN_C_END
2950 
2951 #undef __FUNCT__
2952 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2953 /*@C
2954    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2955    (the default parallel PETSc format).
2956 
2957    Collective on MPI_Comm
2958 
2959    Input Parameters:
2960 +  A - the matrix
2961 .  bs - the block size
2962 .  i - the indices into j for the start of each local row (starts with zero)
2963 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2964 -  v - optional values in the matrix
2965 
2966    Level: developer
2967 
2968 .keywords: matrix, aij, compressed row, sparse, parallel
2969 
2970 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2971 @*/
2972 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2973 {
2974   PetscErrorCode ierr;
2975 
2976   PetscFunctionBegin;
2977   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2978   PetscValidType(B,1);
2979   PetscValidLogicalCollectiveInt(B,bs,2);
2980   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2981   PetscFunctionReturn(0);
2982 }
2983 
2984 EXTERN_C_BEGIN
2985 #undef __FUNCT__
2986 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2987 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2988 {
2989   Mat_MPIBAIJ    *b;
2990   PetscErrorCode ierr;
2991   PetscInt       i;
2992   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
2993 
2994   PetscFunctionBegin;
2995   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
2996   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
2997 
2998   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2999   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3000   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3001   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3002 
3003   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3004   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3005   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3006   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3007   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3008 
3009   if (d_nnz) {
3010     for (i=0; i<B->rmap->n/bs; i++) {
3011       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3012     }
3013   }
3014   if (o_nnz) {
3015     for (i=0; i<B->rmap->n/bs; i++) {
3016       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3017     }
3018   }
3019 
3020   b = (Mat_MPIBAIJ*)B->data;
3021   b->bs2 = bs*bs;
3022   b->mbs = B->rmap->n/bs;
3023   b->nbs = B->cmap->n/bs;
3024   b->Mbs = B->rmap->N/bs;
3025   b->Nbs = B->cmap->N/bs;
3026 
3027   for (i=0; i<=b->size; i++) {
3028     b->rangebs[i] = B->rmap->range[i]/bs;
3029   }
3030   b->rstartbs = B->rmap->rstart/bs;
3031   b->rendbs   = B->rmap->rend/bs;
3032   b->cstartbs = B->cmap->rstart/bs;
3033   b->cendbs   = B->cmap->rend/bs;
3034 
3035   if (!B->preallocated) {
3036     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3037     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3038     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3039     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3040     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3041     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3042     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3043     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3044     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3045   }
3046 
3047   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3048   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3049   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3050   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3051   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3052   B->preallocated = PETSC_TRUE;
3053   PetscFunctionReturn(0);
3054 }
3055 EXTERN_C_END
3056 
3057 EXTERN_C_BEGIN
3058 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3059 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3060 EXTERN_C_END
3061 
3062 
3063 EXTERN_C_BEGIN
3064 #undef __FUNCT__
3065 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3066 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3067 {
3068   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3069   PetscErrorCode ierr;
3070   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3071   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3072   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3073 
3074   PetscFunctionBegin;
3075   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3076   ii[0] = 0;
3077   CHKMEMQ;
3078   for (i=0; i<M; i++) {
3079     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3080     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3081     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3082     /* remove one from count of matrix has diagonal */
3083     for (j=id[i]; j<id[i+1]; j++) {
3084       if (jd[j] == i) {ii[i+1]--;break;}
3085     }
3086   CHKMEMQ;
3087   }
3088   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3089   cnt = 0;
3090   for (i=0; i<M; i++) {
3091     for (j=io[i]; j<io[i+1]; j++) {
3092       if (garray[jo[j]] > rstart) break;
3093       jj[cnt++] = garray[jo[j]];
3094   CHKMEMQ;
3095     }
3096     for (k=id[i]; k<id[i+1]; k++) {
3097       if (jd[k] != i) {
3098         jj[cnt++] = rstart + jd[k];
3099   CHKMEMQ;
3100       }
3101     }
3102     for (;j<io[i+1]; j++) {
3103       jj[cnt++] = garray[jo[j]];
3104   CHKMEMQ;
3105     }
3106   }
3107   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3108   PetscFunctionReturn(0);
3109 }
3110 EXTERN_C_END
3111 
3112 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3113 EXTERN_C_BEGIN
3114 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3115 EXTERN_C_END
3116 
3117 EXTERN_C_BEGIN
3118 #undef __FUNCT__
3119 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3120 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3121 {
3122   PetscErrorCode ierr;
3123   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3124   Mat            B;
3125   Mat_MPIAIJ     *b;
3126 
3127   PetscFunctionBegin;
3128   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3129 
3130   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3131   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3132   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3133   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3134   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3135   b = (Mat_MPIAIJ*) B->data;
3136 
3137   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3138   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3139   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3140   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3141   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3142   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3143   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3144   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3145   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3146   if (reuse == MAT_REUSE_MATRIX) {
3147     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3148   } else {
3149    *newmat = B;
3150   }
3151   PetscFunctionReturn(0);
3152 }
3153 EXTERN_C_END
3154 
3155 EXTERN_C_BEGIN
3156 #if defined(PETSC_HAVE_MUMPS)
3157 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3158 #endif
3159 EXTERN_C_END
3160 
3161 /*MC
3162    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3163 
3164    Options Database Keys:
3165 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3166 . -mat_block_size <bs> - set the blocksize used to store the matrix
3167 - -mat_use_hash_table <fact>
3168 
3169   Level: beginner
3170 
3171 .seealso: MatCreateMPIBAIJ
3172 M*/
3173 
3174 EXTERN_C_BEGIN
3175 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3176 EXTERN_C_END
3177 
3178 EXTERN_C_BEGIN
3179 #undef __FUNCT__
3180 #define __FUNCT__ "MatCreate_MPIBAIJ"
3181 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3182 {
3183   Mat_MPIBAIJ    *b;
3184   PetscErrorCode ierr;
3185   PetscBool      flg;
3186 
3187   PetscFunctionBegin;
3188   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3189   B->data = (void*)b;
3190 
3191   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3192   B->assembled  = PETSC_FALSE;
3193 
3194   B->insertmode = NOT_SET_VALUES;
3195   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3196   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3197 
3198   /* build local table of row and column ownerships */
3199   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3200 
3201   /* build cache for off array entries formed */
3202   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3203   b->donotstash  = PETSC_FALSE;
3204   b->colmap      = PETSC_NULL;
3205   b->garray      = PETSC_NULL;
3206   b->roworiented = PETSC_TRUE;
3207 
3208   /* stuff used in block assembly */
3209   b->barray       = 0;
3210 
3211   /* stuff used for matrix vector multiply */
3212   b->lvec         = 0;
3213   b->Mvctx        = 0;
3214 
3215   /* stuff for MatGetRow() */
3216   b->rowindices   = 0;
3217   b->rowvalues    = 0;
3218   b->getrowactive = PETSC_FALSE;
3219 
3220   /* hash table stuff */
3221   b->ht           = 0;
3222   b->hd           = 0;
3223   b->ht_size      = 0;
3224   b->ht_flag      = PETSC_FALSE;
3225   b->ht_fact      = 0;
3226   b->ht_total_ct  = 0;
3227   b->ht_insert_ct = 0;
3228 
3229   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3230   b->ijonly       = PETSC_FALSE;
3231 
3232   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3233     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3234     if (flg) {
3235       PetscReal fact = 1.39;
3236       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3237       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3238       if (fact <= 1.0) fact = 1.39;
3239       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3240       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3241     }
3242   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3243 
3244 #if defined(PETSC_HAVE_MUMPS)
3245   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3246 #endif
3247   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3248                                      "MatConvert_MPIBAIJ_MPIAdj",
3249                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3250   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3251                                      "MatConvert_MPIBAIJ_MPIAIJ",
3252                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3253   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3254                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3255                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3256   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3257                                      "MatStoreValues_MPIBAIJ",
3258                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3259   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3260                                      "MatRetrieveValues_MPIBAIJ",
3261                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3262   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3263                                      "MatGetDiagonalBlock_MPIBAIJ",
3264                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3265   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3266                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3267                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3268   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3269                                      "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3270                                      MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3271   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3272                                      "MatDiagonalScaleLocal_MPIBAIJ",
3273                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3274   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3275                                      "MatSetHashTableFactor_MPIBAIJ",
3276                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3277   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3278                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3279                                       MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3280   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3281   PetscFunctionReturn(0);
3282 }
3283 EXTERN_C_END
3284 
3285 /*MC
3286    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3287 
3288    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3289    and MATMPIBAIJ otherwise.
3290 
3291    Options Database Keys:
3292 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3293 
3294   Level: beginner
3295 
3296 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3297 M*/
3298 
3299 #undef __FUNCT__
3300 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3301 /*@C
3302    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3303    (block compressed row).  For good matrix assembly performance
3304    the user should preallocate the matrix storage by setting the parameters
3305    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3306    performance can be increased by more than a factor of 50.
3307 
3308    Collective on Mat
3309 
3310    Input Parameters:
3311 +  A - the matrix
3312 .  bs   - size of blockk
3313 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3314            submatrix  (same for all local rows)
3315 .  d_nnz - array containing the number of block nonzeros in the various block rows
3316            of the in diagonal portion of the local (possibly different for each block
3317            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3318            set it even if it is zero.
3319 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3320            submatrix (same for all local rows).
3321 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3322            off-diagonal portion of the local submatrix (possibly different for
3323            each block row) or PETSC_NULL.
3324 
3325    If the *_nnz parameter is given then the *_nz parameter is ignored
3326 
3327    Options Database Keys:
3328 +   -mat_block_size - size of the blocks to use
3329 -   -mat_use_hash_table <fact>
3330 
3331    Notes:
3332    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3333    than it must be used on all processors that share the object for that argument.
3334 
3335    Storage Information:
3336    For a square global matrix we define each processor's diagonal portion
3337    to be its local rows and the corresponding columns (a square submatrix);
3338    each processor's off-diagonal portion encompasses the remainder of the
3339    local matrix (a rectangular submatrix).
3340 
3341    The user can specify preallocated storage for the diagonal part of
3342    the local submatrix with either d_nz or d_nnz (not both).  Set
3343    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3344    memory allocation.  Likewise, specify preallocated storage for the
3345    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3346 
3347    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3348    the figure below we depict these three local rows and all columns (0-11).
3349 
3350 .vb
3351            0 1 2 3 4 5 6 7 8 9 10 11
3352           -------------------
3353    row 3  |  o o o d d d o o o o o o
3354    row 4  |  o o o d d d o o o o o o
3355    row 5  |  o o o d d d o o o o o o
3356           -------------------
3357 .ve
3358 
3359    Thus, any entries in the d locations are stored in the d (diagonal)
3360    submatrix, and any entries in the o locations are stored in the
3361    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3362    stored simply in the MATSEQBAIJ format for compressed row storage.
3363 
3364    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3365    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3366    In general, for PDE problems in which most nonzeros are near the diagonal,
3367    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3368    or you will get TERRIBLE performance; see the users' manual chapter on
3369    matrices.
3370 
3371    You can call MatGetInfo() to get information on how effective the preallocation was;
3372    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3373    You can also run with the option -info and look for messages with the string
3374    malloc in them to see if additional memory allocation was needed.
3375 
3376    Level: intermediate
3377 
3378 .keywords: matrix, block, aij, compressed row, sparse, parallel
3379 
3380 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3381 @*/
3382 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3383 {
3384   PetscErrorCode ierr;
3385 
3386   PetscFunctionBegin;
3387   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3388   PetscValidType(B,1);
3389   PetscValidLogicalCollectiveInt(B,bs,2);
3390   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3391   PetscFunctionReturn(0);
3392 }
3393 
3394 #undef __FUNCT__
3395 #define __FUNCT__ "MatCreateBAIJ"
3396 /*@C
3397    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3398    (block compressed row).  For good matrix assembly performance
3399    the user should preallocate the matrix storage by setting the parameters
3400    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3401    performance can be increased by more than a factor of 50.
3402 
3403    Collective on MPI_Comm
3404 
3405    Input Parameters:
3406 +  comm - MPI communicator
3407 .  bs   - size of blockk
3408 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3409            This value should be the same as the local size used in creating the
3410            y vector for the matrix-vector product y = Ax.
3411 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3412            This value should be the same as the local size used in creating the
3413            x vector for the matrix-vector product y = Ax.
3414 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3415 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3416 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3417            submatrix  (same for all local rows)
3418 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3419            of the in diagonal portion of the local (possibly different for each block
3420            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3421            and set it even if it is zero.
3422 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3423            submatrix (same for all local rows).
3424 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3425            off-diagonal portion of the local submatrix (possibly different for
3426            each block row) or PETSC_NULL.
3427 
3428    Output Parameter:
3429 .  A - the matrix
3430 
3431    Options Database Keys:
3432 +   -mat_block_size - size of the blocks to use
3433 -   -mat_use_hash_table <fact>
3434 
3435    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3436    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3437    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3438 
3439    Notes:
3440    If the *_nnz parameter is given then the *_nz parameter is ignored
3441 
3442    A nonzero block is any block that as 1 or more nonzeros in it
3443 
3444    The user MUST specify either the local or global matrix dimensions
3445    (possibly both).
3446 
3447    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3448    than it must be used on all processors that share the object for that argument.
3449 
3450    Storage Information:
3451    For a square global matrix we define each processor's diagonal portion
3452    to be its local rows and the corresponding columns (a square submatrix);
3453    each processor's off-diagonal portion encompasses the remainder of the
3454    local matrix (a rectangular submatrix).
3455 
3456    The user can specify preallocated storage for the diagonal part of
3457    the local submatrix with either d_nz or d_nnz (not both).  Set
3458    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3459    memory allocation.  Likewise, specify preallocated storage for the
3460    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3461 
3462    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3463    the figure below we depict these three local rows and all columns (0-11).
3464 
3465 .vb
3466            0 1 2 3 4 5 6 7 8 9 10 11
3467           -------------------
3468    row 3  |  o o o d d d o o o o o o
3469    row 4  |  o o o d d d o o o o o o
3470    row 5  |  o o o d d d o o o o o o
3471           -------------------
3472 .ve
3473 
3474    Thus, any entries in the d locations are stored in the d (diagonal)
3475    submatrix, and any entries in the o locations are stored in the
3476    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3477    stored simply in the MATSEQBAIJ format for compressed row storage.
3478 
3479    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3480    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3481    In general, for PDE problems in which most nonzeros are near the diagonal,
3482    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3483    or you will get TERRIBLE performance; see the users' manual chapter on
3484    matrices.
3485 
3486    Level: intermediate
3487 
3488 .keywords: matrix, block, aij, compressed row, sparse, parallel
3489 
3490 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3491 @*/
3492 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3493 {
3494   PetscErrorCode ierr;
3495   PetscMPIInt    size;
3496 
3497   PetscFunctionBegin;
3498   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3499   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3500   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3501   if (size > 1) {
3502     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3503     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3504   } else {
3505     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3506     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3507   }
3508   PetscFunctionReturn(0);
3509 }
3510 
3511 #undef __FUNCT__
3512 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3513 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3514 {
3515   Mat            mat;
3516   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3517   PetscErrorCode ierr;
3518   PetscInt       len=0;
3519 
3520   PetscFunctionBegin;
3521   *newmat       = 0;
3522   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3523   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3524   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3525   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3526 
3527   mat->factortype   = matin->factortype;
3528   mat->preallocated = PETSC_TRUE;
3529   mat->assembled    = PETSC_TRUE;
3530   mat->insertmode   = NOT_SET_VALUES;
3531 
3532   a      = (Mat_MPIBAIJ*)mat->data;
3533   mat->rmap->bs  = matin->rmap->bs;
3534   a->bs2   = oldmat->bs2;
3535   a->mbs   = oldmat->mbs;
3536   a->nbs   = oldmat->nbs;
3537   a->Mbs   = oldmat->Mbs;
3538   a->Nbs   = oldmat->Nbs;
3539 
3540   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3541   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3542 
3543   a->size         = oldmat->size;
3544   a->rank         = oldmat->rank;
3545   a->donotstash   = oldmat->donotstash;
3546   a->roworiented  = oldmat->roworiented;
3547   a->rowindices   = 0;
3548   a->rowvalues    = 0;
3549   a->getrowactive = PETSC_FALSE;
3550   a->barray       = 0;
3551   a->rstartbs     = oldmat->rstartbs;
3552   a->rendbs       = oldmat->rendbs;
3553   a->cstartbs     = oldmat->cstartbs;
3554   a->cendbs       = oldmat->cendbs;
3555 
3556   /* hash table stuff */
3557   a->ht           = 0;
3558   a->hd           = 0;
3559   a->ht_size      = 0;
3560   a->ht_flag      = oldmat->ht_flag;
3561   a->ht_fact      = oldmat->ht_fact;
3562   a->ht_total_ct  = 0;
3563   a->ht_insert_ct = 0;
3564 
3565   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3566   if (oldmat->colmap) {
3567 #if defined (PETSC_USE_CTABLE)
3568   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3569 #else
3570   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3571   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3572   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3573 #endif
3574   } else a->colmap = 0;
3575 
3576   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3577     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3578     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3579     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3580   } else a->garray = 0;
3581 
3582   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3583   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3584   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3585   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3586   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3587 
3588   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3589   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3590   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3591   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3592   ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3593   *newmat = mat;
3594 
3595   PetscFunctionReturn(0);
3596 }
3597 
3598 #undef __FUNCT__
3599 #define __FUNCT__ "MatLoad_MPIBAIJ"
3600 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3601 {
3602   PetscErrorCode ierr;
3603   int            fd;
3604   PetscInt       i,nz,j,rstart,rend;
3605   PetscScalar    *vals,*buf;
3606   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3607   MPI_Status     status;
3608   PetscMPIInt    rank,size,maxnz;
3609   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3610   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3611   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3612   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3613   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3614   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3615 
3616   PetscFunctionBegin;
3617   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3618     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3619   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3620 
3621   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3622   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3623   if (!rank) {
3624     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3625     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3626     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3627   }
3628 
3629   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3630 
3631   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3632   M = header[1]; N = header[2];
3633 
3634   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3635   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3636   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3637 
3638   /* If global sizes are set, check if they are consistent with that given in the file */
3639   if (sizesset) {
3640     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3641   }
3642   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3643   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3644 
3645   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3646 
3647   /*
3648      This code adds extra rows to make sure the number of rows is
3649      divisible by the blocksize
3650   */
3651   Mbs        = M/bs;
3652   extra_rows = bs - M + bs*Mbs;
3653   if (extra_rows == bs) extra_rows = 0;
3654   else                  Mbs++;
3655   if (extra_rows && !rank) {
3656     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3657   }
3658 
3659   /* determine ownership of all rows */
3660   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3661     mbs        = Mbs/size + ((Mbs % size) > rank);
3662     m          = mbs*bs;
3663   } else { /* User set */
3664     m          = newmat->rmap->n;
3665     mbs        = m/bs;
3666   }
3667   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3668   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3669 
3670   /* process 0 needs enough room for process with most rows */
3671   if (!rank) {
3672     mmax = rowners[1];
3673     for (i=2; i<=size; i++) {
3674       mmax = PetscMax(mmax,rowners[i]);
3675     }
3676     mmax*=bs;
3677   } else mmax = m;
3678 
3679   rowners[0] = 0;
3680   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3681   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3682   rstart = rowners[rank];
3683   rend   = rowners[rank+1];
3684 
3685   /* distribute row lengths to all processors */
3686   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3687   if (!rank) {
3688     mend = m;
3689     if (size == 1) mend = mend - extra_rows;
3690     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3691     for (j=mend; j<m; j++) locrowlens[j] = 1;
3692     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3693     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3694     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3695     for (j=0; j<m; j++) {
3696       procsnz[0] += locrowlens[j];
3697     }
3698     for (i=1; i<size; i++) {
3699       mend = browners[i+1] - browners[i];
3700       if (i == size-1) mend = mend - extra_rows;
3701       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3702       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3703       /* calculate the number of nonzeros on each processor */
3704       for (j=0; j<browners[i+1]-browners[i]; j++) {
3705         procsnz[i] += rowlengths[j];
3706       }
3707       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3708     }
3709     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3710   } else {
3711     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3712   }
3713 
3714   if (!rank) {
3715     /* determine max buffer needed and allocate it */
3716     maxnz = procsnz[0];
3717     for (i=1; i<size; i++) {
3718       maxnz = PetscMax(maxnz,procsnz[i]);
3719     }
3720     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3721 
3722     /* read in my part of the matrix column indices  */
3723     nz     = procsnz[0];
3724     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3725     mycols = ibuf;
3726     if (size == 1)  nz -= extra_rows;
3727     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3728     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3729 
3730     /* read in every ones (except the last) and ship off */
3731     for (i=1; i<size-1; i++) {
3732       nz   = procsnz[i];
3733       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3734       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3735     }
3736     /* read in the stuff for the last proc */
3737     if (size != 1) {
3738       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3739       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3740       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3741       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3742     }
3743     ierr = PetscFree(cols);CHKERRQ(ierr);
3744   } else {
3745     /* determine buffer space needed for message */
3746     nz = 0;
3747     for (i=0; i<m; i++) {
3748       nz += locrowlens[i];
3749     }
3750     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3751     mycols = ibuf;
3752     /* receive message of column indices*/
3753     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3754     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3755     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3756   }
3757 
3758   /* loop over local rows, determining number of off diagonal entries */
3759   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3760   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3761   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3762   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3763   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3764   rowcount = 0; nzcount = 0;
3765   for (i=0; i<mbs; i++) {
3766     dcount  = 0;
3767     odcount = 0;
3768     for (j=0; j<bs; j++) {
3769       kmax = locrowlens[rowcount];
3770       for (k=0; k<kmax; k++) {
3771         tmp = mycols[nzcount++]/bs;
3772         if (!mask[tmp]) {
3773           mask[tmp] = 1;
3774           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3775           else masked1[dcount++] = tmp;
3776         }
3777       }
3778       rowcount++;
3779     }
3780 
3781     dlens[i]  = dcount;
3782     odlens[i] = odcount;
3783 
3784     /* zero out the mask elements we set */
3785     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3786     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3787   }
3788 
3789 
3790   if (!sizesset) {
3791     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3792   }
3793   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3794 
3795   if (!rank) {
3796     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3797     /* read in my part of the matrix numerical values  */
3798     nz = procsnz[0];
3799     vals = buf;
3800     mycols = ibuf;
3801     if (size == 1)  nz -= extra_rows;
3802     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3803     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3804 
3805     /* insert into matrix */
3806     jj      = rstart*bs;
3807     for (i=0; i<m; i++) {
3808       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3809       mycols += locrowlens[i];
3810       vals   += locrowlens[i];
3811       jj++;
3812     }
3813     /* read in other processors (except the last one) and ship out */
3814     for (i=1; i<size-1; i++) {
3815       nz   = procsnz[i];
3816       vals = buf;
3817       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3818       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3819     }
3820     /* the last proc */
3821     if (size != 1) {
3822       nz   = procsnz[i] - extra_rows;
3823       vals = buf;
3824       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3825       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3826       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3827     }
3828     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3829   } else {
3830     /* receive numeric values */
3831     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3832 
3833     /* receive message of values*/
3834     vals   = buf;
3835     mycols = ibuf;
3836     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3837 
3838     /* insert into matrix */
3839     jj      = rstart*bs;
3840     for (i=0; i<m; i++) {
3841       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3842       mycols += locrowlens[i];
3843       vals   += locrowlens[i];
3844       jj++;
3845     }
3846   }
3847   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3848   ierr = PetscFree(buf);CHKERRQ(ierr);
3849   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3850   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3851   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3852   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3853   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3854   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3855 
3856   PetscFunctionReturn(0);
3857 }
3858 
3859 #undef __FUNCT__
3860 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3861 /*@
3862    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3863 
3864    Input Parameters:
3865 .  mat  - the matrix
3866 .  fact - factor
3867 
3868    Not Collective, each process can use a different factor
3869 
3870    Level: advanced
3871 
3872   Notes:
3873    This can also be set by the command line option: -mat_use_hash_table <fact>
3874 
3875 .keywords: matrix, hashtable, factor, HT
3876 
3877 .seealso: MatSetOption()
3878 @*/
3879 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3880 {
3881   PetscErrorCode ierr;
3882 
3883   PetscFunctionBegin;
3884   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3885   PetscFunctionReturn(0);
3886 }
3887 
3888 EXTERN_C_BEGIN
3889 #undef __FUNCT__
3890 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3891 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3892 {
3893   Mat_MPIBAIJ *baij;
3894 
3895   PetscFunctionBegin;
3896   baij = (Mat_MPIBAIJ*)mat->data;
3897   baij->ht_fact = fact;
3898   PetscFunctionReturn(0);
3899 }
3900 EXTERN_C_END
3901 
3902 #undef __FUNCT__
3903 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3904 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3905 {
3906   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3907   PetscFunctionBegin;
3908   *Ad     = a->A;
3909   *Ao     = a->B;
3910   *colmap = a->garray;
3911   PetscFunctionReturn(0);
3912 }
3913 
3914 /*
3915     Special version for direct calls from Fortran (to eliminate two function call overheads
3916 */
3917 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3918 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3919 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3920 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3921 #endif
3922 
3923 #undef __FUNCT__
3924 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3925 /*@C
3926   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3927 
3928   Collective on Mat
3929 
3930   Input Parameters:
3931 + mat - the matrix
3932 . min - number of input rows
3933 . im - input rows
3934 . nin - number of input columns
3935 . in - input columns
3936 . v - numerical values input
3937 - addvin - INSERT_VALUES or ADD_VALUES
3938 
3939   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3940 
3941   Level: advanced
3942 
3943 .seealso:   MatSetValuesBlocked()
3944 @*/
3945 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3946 {
3947   /* convert input arguments to C version */
3948   Mat             mat = *matin;
3949   PetscInt        m = *min, n = *nin;
3950   InsertMode      addv = *addvin;
3951 
3952   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3953   const MatScalar *value;
3954   MatScalar       *barray=baij->barray;
3955   PetscBool       roworiented = baij->roworiented;
3956   PetscErrorCode  ierr;
3957   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3958   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3959   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3960 
3961   PetscFunctionBegin;
3962   /* tasks normally handled by MatSetValuesBlocked() */
3963   if (mat->insertmode == NOT_SET_VALUES) {
3964     mat->insertmode = addv;
3965   }
3966 #if defined(PETSC_USE_DEBUG)
3967   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3968   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3969 #endif
3970   if (mat->assembled) {
3971     mat->was_assembled = PETSC_TRUE;
3972     mat->assembled     = PETSC_FALSE;
3973   }
3974   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3975 
3976 
3977   if (!barray) {
3978     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3979     baij->barray = barray;
3980   }
3981 
3982   if (roworiented) {
3983     stepval = (n-1)*bs;
3984   } else {
3985     stepval = (m-1)*bs;
3986   }
3987   for (i=0; i<m; i++) {
3988     if (im[i] < 0) continue;
3989 #if defined(PETSC_USE_DEBUG)
3990     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3991 #endif
3992     if (im[i] >= rstart && im[i] < rend) {
3993       row = im[i] - rstart;
3994       for (j=0; j<n; j++) {
3995         /* If NumCol = 1 then a copy is not required */
3996         if ((roworiented) && (n == 1)) {
3997           barray = (MatScalar*)v + i*bs2;
3998         } else if ((!roworiented) && (m == 1)) {
3999           barray = (MatScalar*)v + j*bs2;
4000         } else { /* Here a copy is required */
4001           if (roworiented) {
4002             value = v + i*(stepval+bs)*bs + j*bs;
4003           } else {
4004             value = v + j*(stepval+bs)*bs + i*bs;
4005           }
4006           for (ii=0; ii<bs; ii++,value+=stepval) {
4007             for (jj=0; jj<bs; jj++) {
4008               *barray++  = *value++;
4009             }
4010           }
4011           barray -=bs2;
4012         }
4013 
4014         if (in[j] >= cstart && in[j] < cend) {
4015           col  = in[j] - cstart;
4016           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4017         }
4018         else if (in[j] < 0) continue;
4019 #if defined(PETSC_USE_DEBUG)
4020         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4021 #endif
4022         else {
4023           if (mat->was_assembled) {
4024             if (!baij->colmap) {
4025               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4026             }
4027 
4028 #if defined(PETSC_USE_DEBUG)
4029 #if defined (PETSC_USE_CTABLE)
4030             { PetscInt data;
4031               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4032               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4033             }
4034 #else
4035             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4036 #endif
4037 #endif
4038 #if defined (PETSC_USE_CTABLE)
4039             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4040             col  = (col - 1)/bs;
4041 #else
4042             col = (baij->colmap[in[j]] - 1)/bs;
4043 #endif
4044             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4045               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4046               col =  in[j];
4047             }
4048           }
4049           else col = in[j];
4050           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4051         }
4052       }
4053     } else {
4054       if (!baij->donotstash) {
4055         if (roworiented) {
4056           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4057         } else {
4058           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4059         }
4060       }
4061     }
4062   }
4063 
4064   /* task normally handled by MatSetValuesBlocked() */
4065   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4066   PetscFunctionReturn(0);
4067 }
4068 
4069 #undef __FUNCT__
4070 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4071 /*@
4072      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4073          CSR format the local rows.
4074 
4075    Collective on MPI_Comm
4076 
4077    Input Parameters:
4078 +  comm - MPI communicator
4079 .  bs - the block size, only a block size of 1 is supported
4080 .  m - number of local rows (Cannot be PETSC_DECIDE)
4081 .  n - This value should be the same as the local size used in creating the
4082        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4083        calculated if N is given) For square matrices n is almost always m.
4084 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4085 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4086 .   i - row indices
4087 .   j - column indices
4088 -   a - matrix values
4089 
4090    Output Parameter:
4091 .   mat - the matrix
4092 
4093    Level: intermediate
4094 
4095    Notes:
4096        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4097      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4098      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4099 
4100        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4101 
4102 .keywords: matrix, aij, compressed row, sparse, parallel
4103 
4104 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4105           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4106 @*/
4107 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4108 {
4109   PetscErrorCode ierr;
4110 
4111 
4112   PetscFunctionBegin;
4113   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4114   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4115   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4116   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4117   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4118   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4119   PetscFunctionReturn(0);
4120 }
4121