xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 5fd668637986a8d8518383a9159eebc368e1d5b4)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++) {
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++) {
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig) {
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j];
226           else             value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         } else if (in[j] < 0) continue;
230 #if defined(PETSC_USE_DEBUG)
231         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
232 #endif
233         else {
234           if (mat->was_assembled) {
235             if (!baij->colmap) {
236               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined (PETSC_USE_CTABLE)
239             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
240             col  = col - 1;
241 #else
242             col = baij->colmap[in[j]/bs] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
245               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
246               col =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
248               B = baij->B;
249               b = (Mat_SeqBAIJ*)(B)->data;
250               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
251               ba=b->a;
252             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
253             else col += in[j]%bs;
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j];
256           else             value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         mat->assembled = PETSC_FALSE;
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray=baij->barray;
283   PetscBool         roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if (!barray) {
291     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) {
296     stepval = (n-1)*bs;
297   } else {
298     stepval = (m-1)*bs;
299   }
300   for (i=0; i<m; i++) {
301     if (im[i] < 0) continue;
302 #if defined(PETSC_USE_DEBUG)
303     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
304 #endif
305     if (im[i] >= rstart && im[i] < rend) {
306       row = im[i] - rstart;
307       for (j=0; j<n; j++) {
308         /* If NumCol = 1 then a copy is not required */
309         if ((roworiented) && (n == 1)) {
310           barray = (MatScalar*)v + i*bs2;
311         } else if ((!roworiented) && (m == 1)) {
312           barray = (MatScalar*)v + j*bs2;
313         } else { /* Here a copy is required */
314           if (roworiented) {
315             value = v + (i*(stepval+bs) + j)*bs;
316           } else {
317             value = v + (j*(stepval+bs) + i)*bs;
318           }
319           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
320             for (jj=0; jj<bs; jj++) {
321               barray[jj]  = value[jj];
322             }
323             barray += bs;
324           }
325           barray -= bs2;
326         }
327 
328         if (in[j] >= cstart && in[j] < cend) {
329           col  = in[j] - cstart;
330           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
331         }
332         else if (in[j] < 0) continue;
333 #if defined(PETSC_USE_DEBUG)
334         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
335 #endif
336         else {
337           if (mat->was_assembled) {
338             if (!baij->colmap) {
339               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
340             }
341 
342 #if defined(PETSC_USE_DEBUG)
343 #if defined (PETSC_USE_CTABLE)
344             { PetscInt data;
345               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
346               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
347             }
348 #else
349             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
350 #endif
351 #endif
352 #if defined (PETSC_USE_CTABLE)
353             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
354             col  = (col - 1)/bs;
355 #else
356             col = (baij->colmap[in[j]] - 1)/bs;
357 #endif
358             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
359               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
360               col =  in[j];
361             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
362           } else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
389   PetscBool      roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408       row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j];
413         else             value = v[i+j*m];
414         /* Look up PetscInto the Hash Table */
415         key = (row/bs)*Nbs+(col/bs)+1;
416         h1  = HASH(size,key,tmp);
417 
418 
419         idx = h1;
420 #if defined(PETSC_USE_DEBUG)
421         insert_ct++;
422         total_ct++;
423         if (HT[idx] != key) {
424           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
425           if (idx == size) {
426             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
427             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
428           }
429         }
430 #else
431         if (HT[idx] != key) {
432           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
433           if (idx == size) {
434             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
435             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453 #if defined(PETSC_USE_DEBUG)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463 {
464   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465   PetscBool         roworiented = baij->roworiented;
466   PetscErrorCode    ierr;
467   PetscInt          i,j,ii,jj,row,col;
468   PetscInt          rstart=baij->rstartbs;
469   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471   PetscReal         tmp;
472   MatScalar         **HD = baij->hd,*baij_a;
473   const PetscScalar *v_t,*value;
474 #if defined(PETSC_USE_DEBUG)
475   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476 #endif
477 
478   PetscFunctionBegin;
479   if (roworiented) {
480     stepval = (n-1)*bs;
481   } else {
482     stepval = (m-1)*bs;
483   }
484   for (i=0; i<m; i++) {
485 #if defined(PETSC_USE_DEBUG)
486     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
487     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
488 #endif
489     row   = im[i];
490     v_t   = v + i*nbs2;
491     if (row >= rstart && row < rend) {
492       for (j=0; j<n; j++) {
493         col = in[j];
494 
495         /* Look up into the Hash Table */
496         key = row*Nbs+col+1;
497         h1  = HASH(size,key,tmp);
498 
499         idx = h1;
500 #if defined(PETSC_USE_DEBUG)
501         total_ct++;
502         insert_ct++;
503        if (HT[idx] != key) {
504           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
505           if (idx == size) {
506             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
507             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
508           }
509         }
510 #else
511         if (HT[idx] != key) {
512           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
513           if (idx == size) {
514             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
515             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
516           }
517         }
518 #endif
519         baij_a = HD[idx];
520         if (roworiented) {
521           /*value = v + i*(stepval+bs)*bs + j*bs;*/
522           /* value = v + (i*(stepval+bs)+j)*bs; */
523           value = v_t;
524           v_t  += bs;
525           if (addv == ADD_VALUES) {
526             for (ii=0; ii<bs; ii++,value+=stepval) {
527               for (jj=ii; jj<bs2; jj+=bs) {
528                 baij_a[jj]  += *value++;
529               }
530             }
531           } else {
532             for (ii=0; ii<bs; ii++,value+=stepval) {
533               for (jj=ii; jj<bs2; jj+=bs) {
534                 baij_a[jj]  = *value++;
535               }
536             }
537           }
538         } else {
539           value = v + j*(stepval+bs)*bs + i*bs;
540           if (addv == ADD_VALUES) {
541             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
542               for (jj=0; jj<bs; jj++) {
543                 baij_a[jj]  += *value++;
544               }
545             }
546           } else {
547             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
548               for (jj=0; jj<bs; jj++) {
549                 baij_a[jj]  = *value++;
550               }
551             }
552           }
553         }
554       }
555     } else {
556       if (!baij->donotstash) {
557         if (roworiented) {
558           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
559         } else {
560           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
561         }
562       }
563     }
564   }
565 #if defined(PETSC_USE_DEBUG)
566   baij->ht_total_ct = total_ct;
567   baij->ht_insert_ct = insert_ct;
568 #endif
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatGetValues_MPIBAIJ"
574 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
575 {
576   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
577   PetscErrorCode ierr;
578   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
579   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
580 
581   PetscFunctionBegin;
582   for (i=0; i<m; i++) {
583     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
584     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
585     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
586       row = idxm[i] - bsrstart;
587       for (j=0; j<n; j++) {
588         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
589         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
590         if (idxn[j] >= bscstart && idxn[j] < bscend) {
591           col = idxn[j] - bscstart;
592           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
593         } else {
594           if (!baij->colmap) {
595             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
596           }
597 #if defined (PETSC_USE_CTABLE)
598           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
599           data --;
600 #else
601           data = baij->colmap[idxn[j]/bs]-1;
602 #endif
603           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
604           else {
605             col  = data + idxn[j]%bs;
606             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
607           }
608         }
609       }
610     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
611   }
612  PetscFunctionReturn(0);
613 }
614 
615 #undef __FUNCT__
616 #define __FUNCT__ "MatNorm_MPIBAIJ"
617 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
618 {
619   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
620   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
621   PetscErrorCode ierr;
622   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
623   PetscReal      sum = 0.0;
624   MatScalar      *v;
625 
626   PetscFunctionBegin;
627   if (baij->size == 1) {
628     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
629   } else {
630     if (type == NORM_FROBENIUS) {
631       v = amat->a;
632       nz = amat->nz*bs2;
633       for (i=0; i<nz; i++) {
634         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
635       }
636       v = bmat->a;
637       nz = bmat->nz*bs2;
638       for (i=0; i<nz; i++) {
639         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
640       }
641       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
642       *nrm = PetscSqrtReal(*nrm);
643     } else if (type == NORM_1) { /* max column sum */
644       PetscReal *tmp,*tmp2;
645       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
646       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
647       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
648       v = amat->a; jj = amat->j;
649       for (i=0; i<amat->nz; i++) {
650         for (j=0; j<bs; j++) {
651           col = bs*(cstart + *jj) + j; /* column index */
652           for (row=0; row<bs; row++) {
653             tmp[col] += PetscAbsScalar(*v);  v++;
654           }
655         }
656         jj++;
657       }
658       v = bmat->a; jj = bmat->j;
659       for (i=0; i<bmat->nz; i++) {
660         for (j=0; j<bs; j++) {
661           col = bs*garray[*jj] + j;
662           for (row=0; row<bs; row++) {
663             tmp[col] += PetscAbsScalar(*v); v++;
664           }
665         }
666         jj++;
667       }
668       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
669       *nrm = 0.0;
670       for (j=0; j<mat->cmap->N; j++) {
671         if (tmp2[j] > *nrm) *nrm = tmp2[j];
672       }
673       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
674     } else if (type == NORM_INFINITY) { /* max row sum */
675       PetscReal *sums;
676       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
677       sum = 0.0;
678       for (j=0; j<amat->mbs; j++) {
679         for (row=0; row<bs; row++) sums[row] = 0.0;
680         v = amat->a + bs2*amat->i[j];
681         nz = amat->i[j+1]-amat->i[j];
682         for (i=0; i<nz; i++) {
683           for (col=0; col<bs; col++) {
684             for (row=0; row<bs; row++) {
685               sums[row] += PetscAbsScalar(*v); v++;
686             }
687           }
688         }
689         v = bmat->a + bs2*bmat->i[j];
690         nz = bmat->i[j+1]-bmat->i[j];
691         for (i=0; i<nz; i++) {
692           for (col=0; col<bs; col++) {
693             for (row=0; row<bs; row++) {
694               sums[row] += PetscAbsScalar(*v); v++;
695             }
696           }
697         }
698         for (row=0; row<bs; row++) {
699           if (sums[row] > sum) sum = sums[row];
700         }
701       }
702       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
703       ierr = PetscFree(sums);CHKERRQ(ierr);
704     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
705   }
706   PetscFunctionReturn(0);
707 }
708 
709 /*
710   Creates the hash table, and sets the table
711   This table is created only once.
712   If new entried need to be added to the matrix
713   then the hash table has to be destroyed and
714   recreated.
715 */
716 #undef __FUNCT__
717 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
718 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
719 {
720   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
721   Mat            A = baij->A,B=baij->B;
722   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
723   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
724   PetscErrorCode ierr;
725   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
726   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
727   PetscInt       *HT,key;
728   MatScalar      **HD;
729   PetscReal      tmp;
730 #if defined(PETSC_USE_INFO)
731   PetscInt       ct=0,max=0;
732 #endif
733 
734   PetscFunctionBegin;
735   if (baij->ht) PetscFunctionReturn(0);
736 
737   baij->ht_size = (PetscInt)(factor*nz);
738   ht_size       = baij->ht_size;
739 
740   /* Allocate Memory for Hash Table */
741   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
742   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
743   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
744   HD   = baij->hd;
745   HT   = baij->ht;
746 
747   /* Loop Over A */
748   for (i=0; i<a->mbs; i++) {
749     for (j=ai[i]; j<ai[i+1]; j++) {
750       row = i+rstart;
751       col = aj[j]+cstart;
752 
753       key = row*Nbs + col + 1;
754       h1  = HASH(ht_size,key,tmp);
755       for (k=0; k<ht_size; k++) {
756         if (!HT[(h1+k)%ht_size]) {
757           HT[(h1+k)%ht_size] = key;
758           HD[(h1+k)%ht_size] = a->a + j*bs2;
759           break;
760 #if defined(PETSC_USE_INFO)
761         } else {
762           ct++;
763 #endif
764         }
765       }
766 #if defined(PETSC_USE_INFO)
767       if (k> max) max = k;
768 #endif
769     }
770   }
771   /* Loop Over B */
772   for (i=0; i<b->mbs; i++) {
773     for (j=bi[i]; j<bi[i+1]; j++) {
774       row = i+rstart;
775       col = garray[bj[j]];
776       key = row*Nbs + col + 1;
777       h1  = HASH(ht_size,key,tmp);
778       for (k=0; k<ht_size; k++) {
779         if (!HT[(h1+k)%ht_size]) {
780           HT[(h1+k)%ht_size] = key;
781           HD[(h1+k)%ht_size] = b->a + j*bs2;
782           break;
783 #if defined(PETSC_USE_INFO)
784         } else {
785           ct++;
786 #endif
787         }
788       }
789 #if defined(PETSC_USE_INFO)
790       if (k> max) max = k;
791 #endif
792     }
793   }
794 
795   /* Print Summary */
796 #if defined(PETSC_USE_INFO)
797   for (i=0,j=0; i<ht_size; i++) {
798     if (HT[i]) {j++;}
799   }
800   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
801 #endif
802   PetscFunctionReturn(0);
803 }
804 
805 #undef __FUNCT__
806 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
807 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
808 {
809   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
810   PetscErrorCode ierr;
811   PetscInt       nstash,reallocs;
812   InsertMode     addv;
813 
814   PetscFunctionBegin;
815   if (baij->donotstash || mat->nooffprocentries) {
816     PetscFunctionReturn(0);
817   }
818 
819   /* make sure all processors are either in INSERTMODE or ADDMODE */
820   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
821   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
822   mat->insertmode = addv; /* in case this processor had no cache */
823 
824   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
825   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
826   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
827   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
828   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
829   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
835 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
836 {
837   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
838   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
839   PetscErrorCode ierr;
840   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
841   PetscInt       *row,*col;
842   PetscBool      r1,r2,r3,other_disassembled;
843   MatScalar      *val;
844   InsertMode     addv = mat->insertmode;
845   PetscMPIInt    n;
846 
847   PetscFunctionBegin;
848   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
849   if (!baij->donotstash && !mat->nooffprocentries) {
850     while (1) {
851       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
852       if (!flg) break;
853 
854       for (i=0; i<n;) {
855         /* Now identify the consecutive vals belonging to the same row */
856         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
857         if (j < n) ncols = j-i;
858         else       ncols = n-i;
859         /* Now assemble all these values with a single function call */
860         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
861         i = j;
862       }
863     }
864     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
865     /* Now process the block-stash. Since the values are stashed column-oriented,
866        set the roworiented flag to column oriented, and after MatSetValues()
867        restore the original flags */
868     r1 = baij->roworiented;
869     r2 = a->roworiented;
870     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
871     baij->roworiented = PETSC_FALSE;
872     a->roworiented    = PETSC_FALSE;
873     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
874     while (1) {
875       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
876       if (!flg) break;
877 
878       for (i=0; i<n;) {
879         /* Now identify the consecutive vals belonging to the same row */
880         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
881         if (j < n) ncols = j-i;
882         else       ncols = n-i;
883         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
884         i = j;
885       }
886     }
887     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
888     baij->roworiented = r1;
889     a->roworiented    = r2;
890     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
891   }
892 
893   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
894   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
895 
896   /* determine if any processor has disassembled, if so we must
897      also disassemble ourselfs, in order that we may reassemble. */
898   /*
899      if nonzero structure of submatrix B cannot change then we know that
900      no processor disassembled thus we can skip this stuff
901   */
902   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
903     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
904     if (mat->was_assembled && !other_disassembled) {
905       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
906     }
907   }
908 
909   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
910     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
911   }
912   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
913   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
914   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
915 
916 #if defined(PETSC_USE_INFO)
917   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
918     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
919     baij->ht_total_ct  = 0;
920     baij->ht_insert_ct = 0;
921   }
922 #endif
923   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
924     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
925     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
926     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
927   }
928 
929   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
930   baij->rowvalues = 0;
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
936 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
937 {
938   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
939   PetscErrorCode    ierr;
940   PetscMPIInt       size = baij->size,rank = baij->rank;
941   PetscInt          bs = mat->rmap->bs;
942   PetscBool         iascii,isdraw;
943   PetscViewer       sviewer;
944   PetscViewerFormat format;
945 
946   PetscFunctionBegin;
947   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
948   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
949   if (iascii) {
950     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
951     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
952       MatInfo info;
953       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
954       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
955       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
956       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
957              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
958       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
959       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
960       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
961       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
962       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
963       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
964       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
965       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
966       PetscFunctionReturn(0);
967     } else if (format == PETSC_VIEWER_ASCII_INFO) {
968       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
969       PetscFunctionReturn(0);
970     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
971       PetscFunctionReturn(0);
972     }
973   }
974 
975   if (isdraw) {
976     PetscDraw       draw;
977     PetscBool  isnull;
978     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
979     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
980   }
981 
982   if (size == 1) {
983     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
984     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
985   } else {
986     /* assemble the entire matrix onto first processor. */
987     Mat         A;
988     Mat_SeqBAIJ *Aloc;
989     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
990     MatScalar   *a;
991 
992     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
993     /* Perhaps this should be the type of mat? */
994     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
995     if (!rank) {
996       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
997     } else {
998       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
999     }
1000     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1001     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1002     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1003     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1004 
1005     /* copy over the A part */
1006     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1007     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1008     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1009 
1010     for (i=0; i<mbs; i++) {
1011       rvals[0] = bs*(baij->rstartbs + i);
1012       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1013       for (j=ai[i]; j<ai[i+1]; j++) {
1014         col = (baij->cstartbs+aj[j])*bs;
1015         for (k=0; k<bs; k++) {
1016           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1017           col++; a += bs;
1018         }
1019       }
1020     }
1021     /* copy over the B part */
1022     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1023     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1024     for (i=0; i<mbs; i++) {
1025       rvals[0] = bs*(baij->rstartbs + i);
1026       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1027       for (j=ai[i]; j<ai[i+1]; j++) {
1028         col = baij->garray[aj[j]]*bs;
1029         for (k=0; k<bs; k++) {
1030           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1031           col++; a += bs;
1032         }
1033       }
1034     }
1035     ierr = PetscFree(rvals);CHKERRQ(ierr);
1036     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1037     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038     /*
1039        Everyone has to call to draw the matrix since the graphics waits are
1040        synchronized across all processors that share the PetscDraw object
1041     */
1042     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1043     if (!rank) {
1044       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1045     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1046       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1047       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1048     }
1049     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1050     ierr = MatDestroy(&A);CHKERRQ(ierr);
1051   }
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 #undef __FUNCT__
1056 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1057 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1058 {
1059   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1060   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1061   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1062   PetscErrorCode ierr;
1063   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1064   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1065   int            fd;
1066   PetscScalar    *column_values;
1067   FILE           *file;
1068   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1069   PetscInt       message_count,flowcontrolcount;
1070 
1071   PetscFunctionBegin;
1072   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1073   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1074   nz   = bs2*(A->nz + B->nz);
1075   rlen = mat->rmap->n;
1076   if (!rank) {
1077     header[0] = MAT_FILE_CLASSID;
1078     header[1] = mat->rmap->N;
1079     header[2] = mat->cmap->N;
1080     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1081     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1082     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1083     /* get largest number of rows any processor has */
1084     range = mat->rmap->range;
1085     for (i=1; i<size; i++) {
1086       rlen = PetscMax(rlen,range[i+1] - range[i]);
1087     }
1088   } else {
1089     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1090   }
1091 
1092   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1093   /* compute lengths of each row  */
1094   for (i=0; i<a->mbs; i++) {
1095     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1096   }
1097   /* store the row lengths to the file */
1098   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1099   if (!rank) {
1100     MPI_Status status;
1101     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1102     rlen  = (range[1] - range[0])/bs;
1103     for (i=0; i<rlen; i++) {
1104       for (j=0; j<bs; j++) {
1105         row_lens[i*bs+j] = bs*crow_lens[i];
1106       }
1107     }
1108     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1109     for (i=1; i<size; i++) {
1110       rlen = (range[i+1] - range[i])/bs;
1111       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1112       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1113       for (k=0; k<rlen; k++) {
1114         for (j=0; j<bs; j++) {
1115           row_lens[k*bs+j] = bs*crow_lens[k];
1116         }
1117       }
1118       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1119     }
1120     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1121     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1122   } else {
1123     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1124     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1125     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1126   }
1127   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1128 
1129   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1130      information needed to make it for each row from a block row. This does require more communication but still not more than
1131      the communication needed for the nonzero values  */
1132   nzmax = nz; /*  space a largest processor needs */
1133   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1134   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1135   cnt  = 0;
1136   for (i=0; i<a->mbs; i++) {
1137     pcnt = cnt;
1138     for (j=B->i[i]; j<B->i[i+1]; j++) {
1139       if ((col = garray[B->j[j]]) > cstart) break;
1140       for (l=0; l<bs; l++) {
1141         column_indices[cnt++] = bs*col+l;
1142       }
1143     }
1144     for (k=A->i[i]; k<A->i[i+1]; k++) {
1145       for (l=0; l<bs; l++) {
1146         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1147       }
1148     }
1149     for (; j<B->i[i+1]; j++) {
1150       for (l=0; l<bs; l++) {
1151         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1152       }
1153     }
1154     len = cnt - pcnt;
1155     for (k=1; k<bs; k++) {
1156       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1157       cnt += len;
1158     }
1159   }
1160   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1161 
1162   /* store the columns to the file */
1163   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1164   if (!rank) {
1165     MPI_Status status;
1166     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1167     for (i=1; i<size; i++) {
1168       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1169       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1170       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1171       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1172     }
1173     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1174   } else {
1175     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1176     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1177     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1178     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1179   }
1180   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1181 
1182   /* load up the numerical values */
1183   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1184   cnt = 0;
1185   for (i=0; i<a->mbs; i++) {
1186     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1187     for (j=B->i[i]; j<B->i[i+1]; j++) {
1188       if (garray[B->j[j]] > cstart) break;
1189       for (l=0; l<bs; l++) {
1190         for (ll=0; ll<bs; ll++) {
1191           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1192         }
1193       }
1194       cnt += bs;
1195     }
1196     for (k=A->i[i]; k<A->i[i+1]; k++) {
1197       for (l=0; l<bs; l++) {
1198         for (ll=0; ll<bs; ll++) {
1199           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1200         }
1201       }
1202       cnt += bs;
1203     }
1204     for (; j<B->i[i+1]; j++) {
1205       for (l=0; l<bs; l++) {
1206         for (ll=0; ll<bs; ll++) {
1207           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1208         }
1209       }
1210       cnt += bs;
1211     }
1212     cnt += (bs-1)*rlen;
1213   }
1214   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1215 
1216   /* store the column values to the file */
1217   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1218   if (!rank) {
1219     MPI_Status status;
1220     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1221     for (i=1; i<size; i++) {
1222       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1223       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1224       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1225       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1226     }
1227     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1228   } else {
1229     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1230     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1231     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1232     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1233   }
1234   ierr = PetscFree(column_values);CHKERRQ(ierr);
1235 
1236   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1237   if (file) {
1238     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1239   }
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNCT__
1244 #define __FUNCT__ "MatView_MPIBAIJ"
1245 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1246 {
1247   PetscErrorCode ierr;
1248   PetscBool      iascii,isdraw,issocket,isbinary;
1249 
1250   PetscFunctionBegin;
1251   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1254   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1255   if (iascii || isdraw || issocket) {
1256     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1257   } else if (isbinary) {
1258     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1259   }
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1265 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1266 {
1267   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1268   PetscErrorCode ierr;
1269 
1270   PetscFunctionBegin;
1271 #if defined(PETSC_USE_LOG)
1272   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1273 #endif
1274   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1275   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1276   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1277   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1278 #if defined (PETSC_USE_CTABLE)
1279   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1280 #else
1281   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1282 #endif
1283   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1284   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1285   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1286   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1287   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1288   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1289   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1290   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1291 
1292   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatMult_MPIBAIJ"
1307 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1308 {
1309   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311   PetscInt       nt;
1312 
1313   PetscFunctionBegin;
1314   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1315   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1316   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1317   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1318   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1319   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1320   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1321   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1327 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1328 {
1329   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1334   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1335   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1336   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1342 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1343 {
1344   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1345   PetscErrorCode ierr;
1346   PetscBool      merged;
1347 
1348   PetscFunctionBegin;
1349   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1350   /* do nondiagonal part */
1351   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1352   if (!merged) {
1353     /* send it on its way */
1354     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1355     /* do local part */
1356     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1357     /* receive remote parts: note this assumes the values are not actually */
1358     /* inserted in yy until the next line */
1359     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1360   } else {
1361     /* do local part */
1362     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1363     /* send it on its way */
1364     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1365     /* values actually were received in the Begin() but we need to call this nop */
1366     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1367   }
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1373 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1374 {
1375   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1376   PetscErrorCode ierr;
1377 
1378   PetscFunctionBegin;
1379   /* do nondiagonal part */
1380   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1381   /* send it on its way */
1382   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383   /* do local part */
1384   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1385   /* receive remote parts: note this assumes the values are not actually */
1386   /* inserted in yy until the next line, which is true for my implementation*/
1387   /* but is not perhaps always true. */
1388   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 /*
1393   This only works correctly for square matrices where the subblock A->A is the
1394    diagonal block
1395 */
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1398 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1399 {
1400   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1401   PetscErrorCode ierr;
1402 
1403   PetscFunctionBegin;
1404   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1405   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatScale_MPIBAIJ"
1411 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1412 {
1413   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1414   PetscErrorCode ierr;
1415 
1416   PetscFunctionBegin;
1417   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1418   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1424 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1425 {
1426   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1427   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1428   PetscErrorCode ierr;
1429   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1430   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1431   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1432 
1433   PetscFunctionBegin;
1434   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1435   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1436   mat->getrowactive = PETSC_TRUE;
1437 
1438   if (!mat->rowvalues && (idx || v)) {
1439     /*
1440         allocate enough space to hold information from the longest row.
1441     */
1442     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1443     PetscInt     max = 1,mbs = mat->mbs,tmp;
1444     for (i=0; i<mbs; i++) {
1445       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1446       if (max < tmp) { max = tmp; }
1447     }
1448     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1449   }
1450   lrow = row - brstart;
1451 
1452   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1453   if (!v)   {pvA = 0; pvB = 0;}
1454   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1455   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1456   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1457   nztot = nzA + nzB;
1458 
1459   cmap  = mat->garray;
1460   if (v  || idx) {
1461     if (nztot) {
1462       /* Sort by increasing column numbers, assuming A and B already sorted */
1463       PetscInt imark = -1;
1464       if (v) {
1465         *v = v_p = mat->rowvalues;
1466         for (i=0; i<nzB; i++) {
1467           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1468           else break;
1469         }
1470         imark = i;
1471         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1472         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1473       }
1474       if (idx) {
1475         *idx = idx_p = mat->rowindices;
1476         if (imark > -1) {
1477           for (i=0; i<imark; i++) {
1478             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1479           }
1480         } else {
1481           for (i=0; i<nzB; i++) {
1482             if (cmap[cworkB[i]/bs] < cstart)
1483               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1484             else break;
1485           }
1486           imark = i;
1487         }
1488         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1489         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1490       }
1491     } else {
1492       if (idx) *idx = 0;
1493       if (v)   *v   = 0;
1494     }
1495   }
1496   *nz = nztot;
1497   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1498   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1504 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1505 {
1506   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1507 
1508   PetscFunctionBegin;
1509   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1510   baij->getrowactive = PETSC_FALSE;
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1516 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1517 {
1518   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1519   PetscErrorCode ierr;
1520 
1521   PetscFunctionBegin;
1522   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1523   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 #undef __FUNCT__
1528 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1529 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1530 {
1531   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1532   Mat            A = a->A,B = a->B;
1533   PetscErrorCode ierr;
1534   PetscReal      isend[5],irecv[5];
1535 
1536   PetscFunctionBegin;
1537   info->block_size     = (PetscReal)matin->rmap->bs;
1538   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1539   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1540   isend[3] = info->memory;  isend[4] = info->mallocs;
1541   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1542   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1543   isend[3] += info->memory;  isend[4] += info->mallocs;
1544   if (flag == MAT_LOCAL) {
1545     info->nz_used      = isend[0];
1546     info->nz_allocated = isend[1];
1547     info->nz_unneeded  = isend[2];
1548     info->memory       = isend[3];
1549     info->mallocs      = isend[4];
1550   } else if (flag == MAT_GLOBAL_MAX) {
1551     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1552     info->nz_used      = irecv[0];
1553     info->nz_allocated = irecv[1];
1554     info->nz_unneeded  = irecv[2];
1555     info->memory       = irecv[3];
1556     info->mallocs      = irecv[4];
1557   } else if (flag == MAT_GLOBAL_SUM) {
1558     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1559     info->nz_used      = irecv[0];
1560     info->nz_allocated = irecv[1];
1561     info->nz_unneeded  = irecv[2];
1562     info->memory       = irecv[3];
1563     info->mallocs      = irecv[4];
1564   } else SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1565   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1566   info->fill_ratio_needed = 0;
1567   info->factor_mallocs    = 0;
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1573 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1574 {
1575   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1576   PetscErrorCode ierr;
1577 
1578   PetscFunctionBegin;
1579   switch (op) {
1580   case MAT_NEW_NONZERO_LOCATIONS:
1581   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1582   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1583   case MAT_KEEP_NONZERO_PATTERN:
1584   case MAT_NEW_NONZERO_LOCATION_ERR:
1585     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1586     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1587     break;
1588   case MAT_ROW_ORIENTED:
1589     a->roworiented = flg;
1590     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1591     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1592     break;
1593   case MAT_NEW_DIAGONALS:
1594     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1595     break;
1596   case MAT_IGNORE_OFF_PROC_ENTRIES:
1597     a->donotstash = flg;
1598     break;
1599   case MAT_USE_HASH_TABLE:
1600     a->ht_flag = flg;
1601     break;
1602   case MAT_SYMMETRIC:
1603   case MAT_STRUCTURALLY_SYMMETRIC:
1604   case MAT_HERMITIAN:
1605   case MAT_SYMMETRY_ETERNAL:
1606     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1607     break;
1608   default:
1609     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1610   }
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 #undef __FUNCT__
1615 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1616 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1617 {
1618   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1619   Mat_SeqBAIJ    *Aloc;
1620   Mat            B;
1621   PetscErrorCode ierr;
1622   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1623   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1624   MatScalar      *a;
1625 
1626   PetscFunctionBegin;
1627   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1628   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1629     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1630     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1631     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1632     /* Do not know preallocation information, but must set block size */
1633     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr);
1634   } else {
1635     B = *matout;
1636   }
1637 
1638   /* copy over the A part */
1639   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1640   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1641   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1642 
1643   for (i=0; i<mbs; i++) {
1644     rvals[0] = bs*(baij->rstartbs + i);
1645     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1646     for (j=ai[i]; j<ai[i+1]; j++) {
1647       col = (baij->cstartbs+aj[j])*bs;
1648       for (k=0; k<bs; k++) {
1649         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1650         col++; a += bs;
1651       }
1652     }
1653   }
1654   /* copy over the B part */
1655   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1656   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1657   for (i=0; i<mbs; i++) {
1658     rvals[0] = bs*(baij->rstartbs + i);
1659     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1660     for (j=ai[i]; j<ai[i+1]; j++) {
1661       col = baij->garray[aj[j]]*bs;
1662       for (k=0; k<bs; k++) {
1663         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1664         col++; a += bs;
1665       }
1666     }
1667   }
1668   ierr = PetscFree(rvals);CHKERRQ(ierr);
1669   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1671 
1672   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1673     *matout = B;
1674   } else {
1675     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1676   }
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 #undef __FUNCT__
1681 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1682 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1683 {
1684   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1685   Mat            a = baij->A,b = baij->B;
1686   PetscErrorCode ierr;
1687   PetscInt       s1,s2,s3;
1688 
1689   PetscFunctionBegin;
1690   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1691   if (rr) {
1692     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1693     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1694     /* Overlap communication with computation. */
1695     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1696   }
1697   if (ll) {
1698     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1699     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1700     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1701   }
1702   /* scale  the diagonal block */
1703   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1704 
1705   if (rr) {
1706     /* Do a scatter end and then right scale the off-diagonal block */
1707     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1708     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1709   }
1710 
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1716 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1717 {
1718   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1719   PetscErrorCode    ierr;
1720   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1721   PetscInt          i,*owners = A->rmap->range;
1722   PetscInt          *nprocs,j,idx,nsends,row;
1723   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1724   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1725   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1726   MPI_Comm          comm = ((PetscObject)A)->comm;
1727   MPI_Request       *send_waits,*recv_waits;
1728   MPI_Status        recv_status,*send_status;
1729   const PetscScalar *xx;
1730   PetscScalar       *bb;
1731 #if defined(PETSC_DEBUG)
1732   PetscBool         found = PETSC_FALSE;
1733 #endif
1734 
1735   PetscFunctionBegin;
1736   /*  first count number of contributors to each processor */
1737   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1738   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1739   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1740   j = 0;
1741   for (i=0; i<N; i++) {
1742     if (lastidx > (idx = rows[i])) j = 0;
1743     lastidx = idx;
1744     for (; j<size; j++) {
1745       if (idx >= owners[j] && idx < owners[j+1]) {
1746         nprocs[2*j]++;
1747         nprocs[2*j+1] = 1;
1748         owner[i] = j;
1749 #if defined(PETSC_DEBUG)
1750         found = PETSC_TRUE;
1751 #endif
1752         break;
1753       }
1754     }
1755 #if defined(PETSC_DEBUG)
1756     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1757     found = PETSC_FALSE;
1758 #endif
1759   }
1760   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1761 
1762   if (A->nooffproczerorows) {
1763     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1764     nrecvs = nsends;
1765     nmax   = N;
1766   } else {
1767     /* inform other processors of number of messages and max length*/
1768     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1769   }
1770 
1771   /* post receives:   */
1772   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1773   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1774   for (i=0; i<nrecvs; i++) {
1775     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1776   }
1777 
1778   /* do sends:
1779      1) starts[i] gives the starting index in svalues for stuff going to
1780      the ith processor
1781   */
1782   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1783   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1784   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1785   starts[0]  = 0;
1786   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1787   for (i=0; i<N; i++) {
1788     svalues[starts[owner[i]]++] = rows[i];
1789   }
1790 
1791   starts[0] = 0;
1792   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1793   count = 0;
1794   for (i=0; i<size; i++) {
1795     if (nprocs[2*i+1]) {
1796       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1797     }
1798   }
1799   ierr = PetscFree(starts);CHKERRQ(ierr);
1800 
1801   base = owners[rank];
1802 
1803   /*  wait on receives */
1804   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1805   count  = nrecvs;
1806   slen = 0;
1807   while (count) {
1808     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1809     /* unpack receives into our local space */
1810     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1811     source[imdex]  = recv_status.MPI_SOURCE;
1812     lens[imdex]    = n;
1813     slen          += n;
1814     count--;
1815   }
1816   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1817 
1818   /* move the data into the send scatter */
1819   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1820   count = 0;
1821   for (i=0; i<nrecvs; i++) {
1822     values = rvalues + i*nmax;
1823     for (j=0; j<lens[i]; j++) {
1824       lrows[count++] = values[j] - base;
1825     }
1826   }
1827   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1828   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1829   ierr = PetscFree(owner);CHKERRQ(ierr);
1830   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1831 
1832   /* fix right hand side if needed */
1833   if (x && b) {
1834     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1835     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1836     for (i=0; i<slen; i++) {
1837       bb[lrows[i]] = diag*xx[lrows[i]];
1838     }
1839     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1840     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1841   }
1842 
1843   /* actually zap the local rows */
1844   /*
1845         Zero the required rows. If the "diagonal block" of the matrix
1846      is square and the user wishes to set the diagonal we use separate
1847      code so that MatSetValues() is not called for each diagonal allocating
1848      new memory, thus calling lots of mallocs and slowing things down.
1849 
1850   */
1851   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1852   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1853   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1854     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1855   } else if (diag != 0.0) {
1856     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1857     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1858        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1859     for (i=0; i<slen; i++) {
1860       row  = lrows[i] + rstart_bs;
1861       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1862     }
1863     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1864     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1865   } else {
1866     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1867   }
1868 
1869   ierr = PetscFree(lrows);CHKERRQ(ierr);
1870 
1871   /* wait on sends */
1872   if (nsends) {
1873     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1874     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1875     ierr = PetscFree(send_status);CHKERRQ(ierr);
1876   }
1877   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1878   ierr = PetscFree(svalues);CHKERRQ(ierr);
1879 
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1885 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1886 {
1887   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1888   PetscErrorCode ierr;
1889 
1890   PetscFunctionBegin;
1891   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1896 
1897 #undef __FUNCT__
1898 #define __FUNCT__ "MatEqual_MPIBAIJ"
1899 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1900 {
1901   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1902   Mat            a,b,c,d;
1903   PetscBool      flg;
1904   PetscErrorCode ierr;
1905 
1906   PetscFunctionBegin;
1907   a = matA->A; b = matA->B;
1908   c = matB->A; d = matB->B;
1909 
1910   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1911   if (flg) {
1912     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1913   }
1914   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatCopy_MPIBAIJ"
1920 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1921 {
1922   PetscErrorCode ierr;
1923   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1924   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1925 
1926   PetscFunctionBegin;
1927   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1928   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1929     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1930   } else {
1931     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1932     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1933   }
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 #undef __FUNCT__
1938 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1939 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1940 {
1941   PetscErrorCode ierr;
1942 
1943   PetscFunctionBegin;
1944   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1950 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1951 {
1952   PetscErrorCode ierr;
1953   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1954   PetscBLASInt   bnz,one=1;
1955   Mat_SeqBAIJ    *x,*y;
1956 
1957   PetscFunctionBegin;
1958   if (str == SAME_NONZERO_PATTERN) {
1959     PetscScalar alpha = a;
1960     x    = (Mat_SeqBAIJ *)xx->A->data;
1961     y    = (Mat_SeqBAIJ *)yy->A->data;
1962     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1963     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1964     x    = (Mat_SeqBAIJ *)xx->B->data;
1965     y    = (Mat_SeqBAIJ *)yy->B->data;
1966     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1967     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1968   } else {
1969     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1970   }
1971   PetscFunctionReturn(0);
1972 }
1973 
1974 #undef __FUNCT__
1975 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1976 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1977 {
1978   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1979   PetscErrorCode ierr;
1980 
1981   PetscFunctionBegin;
1982   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1983   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 #undef __FUNCT__
1988 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1989 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1990 {
1991   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1996   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 #undef __FUNCT__
2001 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2002 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2003 {
2004   PetscErrorCode ierr;
2005   IS             iscol_local;
2006   PetscInt       csize;
2007 
2008   PetscFunctionBegin;
2009   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2010   if (call == MAT_REUSE_MATRIX) {
2011     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2012     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2013   } else {
2014     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2015   }
2016   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2017   if (call == MAT_INITIAL_MATRIX) {
2018     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2019     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2020   }
2021   PetscFunctionReturn(0);
2022 }
2023 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2024 #undef __FUNCT__
2025 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2026 /*
2027   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2028   in local and then by concatenating the local matrices the end result.
2029   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2030 */
2031 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2032 {
2033   PetscErrorCode ierr;
2034   PetscMPIInt    rank,size;
2035   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2036   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2037   Mat            M,Mreuse;
2038   MatScalar      *vwork,*aa;
2039   MPI_Comm       comm = ((PetscObject)mat)->comm;
2040   IS             isrow_new, iscol_new;
2041   PetscBool      idflag,allrows, allcols;
2042   Mat_SeqBAIJ    *aij;
2043 
2044   PetscFunctionBegin;
2045   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2046   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2047   /* The compression and expansion should be avoided. Doesn't point
2048      out errors, might change the indices, hence buggey */
2049   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2050   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2051 
2052   /* Check for special case: each processor gets entire matrix columns */
2053   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2054   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2055   if (idflag && ncol == mat->cmap->N) {
2056     allcols = PETSC_TRUE;
2057   } else {
2058     allcols = PETSC_FALSE;
2059   }
2060 
2061   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2062   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2063   if (idflag && nrow == mat->rmap->N) {
2064     allrows = PETSC_TRUE;
2065   } else {
2066     allrows = PETSC_FALSE;
2067   }
2068   if (call ==  MAT_REUSE_MATRIX) {
2069     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2070     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2071     ierr  = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2072   } else {
2073     ierr   = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2074   }
2075   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2076   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2077   /*
2078       m - number of local rows
2079       n - number of columns (same on all processors)
2080       rstart - first row in new global matrix generated
2081   */
2082   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2083   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2084   m    = m/bs;
2085   n    = n/bs;
2086 
2087   if (call == MAT_INITIAL_MATRIX) {
2088     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2089     ii  = aij->i;
2090     jj  = aij->j;
2091 
2092     /*
2093         Determine the number of non-zeros in the diagonal and off-diagonal
2094         portions of the matrix in order to do correct preallocation
2095     */
2096 
2097     /* first get start and end of "diagonal" columns */
2098     if (csize == PETSC_DECIDE) {
2099       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2100       if (mglobal == n*bs) { /* square matrix */
2101         nlocal = m;
2102       } else {
2103         nlocal = n/size + ((n % size) > rank);
2104       }
2105     } else {
2106       nlocal = csize/bs;
2107     }
2108     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2109     rstart = rend - nlocal;
2110     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2111 
2112     /* next, compute all the lengths */
2113     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2114     olens = dlens + m;
2115     for (i=0; i<m; i++) {
2116       jend = ii[i+1] - ii[i];
2117       olen = 0;
2118       dlen = 0;
2119       for (j=0; j<jend; j++) {
2120         if (*jj < rstart || *jj >= rend) olen++;
2121         else dlen++;
2122         jj++;
2123       }
2124       olens[i] = olen;
2125       dlens[i] = dlen;
2126     }
2127     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2128     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2129     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2130     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2131     ierr = PetscFree(dlens);CHKERRQ(ierr);
2132   } else {
2133     PetscInt ml,nl;
2134 
2135     M = *newmat;
2136     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2137     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2138     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2139     /*
2140          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2141        rather than the slower MatSetValues().
2142     */
2143     M->was_assembled = PETSC_TRUE;
2144     M->assembled     = PETSC_FALSE;
2145   }
2146   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2147   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2148   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2149   ii  = aij->i;
2150   jj  = aij->j;
2151   aa  = aij->a;
2152   for (i=0; i<m; i++) {
2153     row   = rstart/bs + i;
2154     nz    = ii[i+1] - ii[i];
2155     cwork = jj;     jj += nz;
2156     vwork = aa;     aa += nz*bs*bs;
2157     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2158   }
2159 
2160   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2161   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2162   *newmat = M;
2163 
2164   /* save submatrix used in processor for next request */
2165   if (call ==  MAT_INITIAL_MATRIX) {
2166     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2167     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2168   }
2169 
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 #undef __FUNCT__
2174 #define __FUNCT__ "MatPermute_MPIBAIJ"
2175 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2176 {
2177   MPI_Comm       comm,pcomm;
2178   PetscInt       first,rlocal_size,clocal_size,nrows;
2179   const PetscInt *rows;
2180   PetscMPIInt    size;
2181   IS             crowp,growp,irowp,lrowp,lcolp;
2182   PetscErrorCode ierr;
2183 
2184   PetscFunctionBegin;
2185   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2186   /* make a collective version of 'rowp' */
2187   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2188   if (pcomm==comm) {
2189     crowp = rowp;
2190   } else {
2191     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2192     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2193     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2194     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2195   }
2196   /* collect the global row permutation and invert it */
2197   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2198   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2199   if (pcomm!=comm) {
2200     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2201   }
2202   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2203   ierr = ISDestroy(&growp);CHKERRQ(ierr);
2204   /* get the local target indices */
2205   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2206   ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr);
2207   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2208   ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2209   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2210   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2211   /* the column permutation is so much easier;
2212      make a local version of 'colp' and invert it */
2213   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2214   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2215   if (size==1) {
2216     lcolp = colp;
2217   } else {
2218     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2219   }
2220   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2221   /* now we just get the submatrix */
2222   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2223   if (size>1) {
2224     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2225   }
2226   /* clean up */
2227   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 #undef __FUNCT__
2232 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2233 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2234 {
2235   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2236   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2237 
2238   PetscFunctionBegin;
2239   if (nghosts) { *nghosts = B->nbs;}
2240   if (ghosts) {*ghosts = baij->garray;}
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2245 
2246 #undef __FUNCT__
2247 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2248 /*
2249     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2250 */
2251 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2252 {
2253   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2254   PetscErrorCode        ierr;
2255   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2256   PetscInt              bs,i,n,nrows,j,k,m,ncols,col;
2257   const PetscInt        *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj;
2258   PetscInt              nis = iscoloring->n,nctot,*cols;
2259   PetscInt              *rowhit,M,cstart,cend,colb;
2260   PetscInt              *columnsforrow,l;
2261   IS                    *isa;
2262   PetscBool              done,flg;
2263   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2264   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2265 
2266   PetscFunctionBegin;
2267   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2268   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2269 
2270   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2271   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2272   M                = mat->rmap->n/bs;
2273   cstart           = mat->cmap->rstart/bs;
2274   cend             = mat->cmap->rend/bs;
2275   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2276   c->N             = mat->cmap->N/bs;
2277   c->m             = mat->rmap->n/bs;
2278   c->rstart        = mat->rmap->rstart/bs;
2279 
2280   c->ncolors       = nis;
2281   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2282   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2283   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2284   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2285   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2286   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2287 
2288   /* Allow access to data structures of local part of matrix */
2289   if (!baij->colmap) {
2290     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2291   }
2292   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2293   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2294 
2295   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2296   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2297 
2298   for (i=0; i<nis; i++) {
2299     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2300     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2301     c->ncolumns[i] = n;
2302     if (n) {
2303       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2304       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2305       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2306     } else {
2307       c->columns[i]  = 0;
2308     }
2309 
2310     if (ctype == IS_COLORING_GLOBAL) {
2311       /* Determine the total (parallel) number of columns of this color */
2312       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2313       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2314 
2315       nn   = PetscMPIIntCast(n);
2316       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2317       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2318       if (!nctot) {
2319         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2320       }
2321 
2322       disp[0] = 0;
2323       for (j=1; j<size; j++) {
2324         disp[j] = disp[j-1] + ncolsonproc[j-1];
2325       }
2326 
2327       /* Get complete list of columns for color on each processor */
2328       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2329       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2330       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2331     } else if (ctype == IS_COLORING_GHOSTED) {
2332       /* Determine local number of columns of this color on this process, including ghost points */
2333       nctot = n;
2334       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2335       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2336     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2337 
2338     /*
2339        Mark all rows affect by these columns
2340     */
2341     /* Temporary option to allow for debugging/testing */
2342     flg  = PETSC_FALSE;
2343     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2344     if (!flg) {/*-----------------------------------------------------------------------------*/
2345       /* crude, fast version */
2346       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2347       /* loop over columns*/
2348       for (j=0; j<nctot; j++) {
2349         if (ctype == IS_COLORING_GHOSTED) {
2350           col = ltog[cols[j]];
2351         } else {
2352           col  = cols[j];
2353         }
2354         if (col >= cstart && col < cend) {
2355           /* column is in diagonal block of matrix */
2356           rows = A_cj + A_ci[col-cstart];
2357           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2358         } else {
2359 #if defined (PETSC_USE_CTABLE)
2360           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2361           colb --;
2362 #else
2363           colb = baij->colmap[col] - 1;
2364 #endif
2365           if (colb == -1) {
2366             m = 0;
2367           } else {
2368             colb = colb/bs;
2369             rows = B_cj + B_ci[colb];
2370             m    = B_ci[colb+1] - B_ci[colb];
2371           }
2372         }
2373         /* loop over columns marking them in rowhit */
2374         for (k=0; k<m; k++) {
2375           rowhit[*rows++] = col + 1;
2376         }
2377       }
2378 
2379       /* count the number of hits */
2380       nrows = 0;
2381       for (j=0; j<M; j++) {
2382         if (rowhit[j]) nrows++;
2383       }
2384       c->nrows[i]         = nrows;
2385       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2386       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2387       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2388       nrows = 0;
2389       for (j=0; j<M; j++) {
2390         if (rowhit[j]) {
2391           c->rows[i][nrows]           = j;
2392           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2393           nrows++;
2394         }
2395       }
2396     } else {/*-------------------------------------------------------------------------------*/
2397       /* slow version, using rowhit as a linked list */
2398       PetscInt currentcol,fm,mfm;
2399       rowhit[M] = M;
2400       nrows     = 0;
2401       /* loop over columns*/
2402       for (j=0; j<nctot; j++) {
2403         if (ctype == IS_COLORING_GHOSTED) {
2404           col = ltog[cols[j]];
2405         } else {
2406           col  = cols[j];
2407         }
2408         if (col >= cstart && col < cend) {
2409           /* column is in diagonal block of matrix */
2410           rows = A_cj + A_ci[col-cstart];
2411           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2412         } else {
2413 #if defined (PETSC_USE_CTABLE)
2414           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2415           colb --;
2416 #else
2417           colb = baij->colmap[col] - 1;
2418 #endif
2419           if (colb == -1) {
2420             m = 0;
2421           } else {
2422             colb = colb/bs;
2423             rows = B_cj + B_ci[colb];
2424             m    = B_ci[colb+1] - B_ci[colb];
2425           }
2426         }
2427 
2428         /* loop over columns marking them in rowhit */
2429         fm    = M; /* fm points to first entry in linked list */
2430         for (k=0; k<m; k++) {
2431           currentcol = *rows++;
2432           /* is it already in the list? */
2433           do {
2434             mfm  = fm;
2435             fm   = rowhit[fm];
2436           } while (fm < currentcol);
2437           /* not in list so add it */
2438           if (fm != currentcol) {
2439             nrows++;
2440             columnsforrow[currentcol] = col;
2441             /* next three lines insert new entry into linked list */
2442             rowhit[mfm]               = currentcol;
2443             rowhit[currentcol]        = fm;
2444             fm                        = currentcol;
2445             /* fm points to present position in list since we know the columns are sorted */
2446           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2447         }
2448       }
2449       c->nrows[i]         = nrows;
2450       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2451       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2452       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2453       /* now store the linked list of rows into c->rows[i] */
2454       nrows = 0;
2455       fm    = rowhit[M];
2456       do {
2457         c->rows[i][nrows]            = fm;
2458         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2459         fm                           = rowhit[fm];
2460       } while (fm < M);
2461     } /* ---------------------------------------------------------------------------------------*/
2462     ierr = PetscFree(cols);CHKERRQ(ierr);
2463   }
2464 
2465   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2466   /*
2467        vscale will contain the "diagonal" on processor scalings followed by the off processor
2468   */
2469   if (ctype == IS_COLORING_GLOBAL) {
2470     PetscInt *garray;
2471     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2472     for (i=0; i<baij->B->cmap->n/bs; i++) {
2473       for (j=0; j<bs; j++) {
2474         garray[i*bs+j] = bs*baij->garray[i]+j;
2475       }
2476     }
2477     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2478     ierr = PetscFree(garray);CHKERRQ(ierr);
2479     CHKMEMQ;
2480     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2481     for (k=0; k<c->ncolors; k++) {
2482       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2483       for (l=0; l<c->nrows[k]; l++) {
2484         col = c->columnsforrow[k][l];
2485         if (col >= cstart && col < cend) {
2486           /* column is in diagonal block of matrix */
2487           colb = col - cstart;
2488         } else {
2489           /* column  is in "off-processor" part */
2490 #if defined (PETSC_USE_CTABLE)
2491           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2492           colb --;
2493 #else
2494           colb = baij->colmap[col] - 1;
2495 #endif
2496           colb = colb/bs;
2497           colb += cend - cstart;
2498         }
2499         c->vscaleforrow[k][l] = colb;
2500       }
2501     }
2502   } else if (ctype == IS_COLORING_GHOSTED) {
2503     /* Get gtol mapping */
2504     PetscInt N = mat->cmap->N, *gtol;
2505     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2506     for (i=0; i<N; i++) gtol[i] = -1;
2507     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2508 
2509     c->vscale = 0; /* will be created in MatFDColoringApply() */
2510     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2511     for (k=0; k<c->ncolors; k++) {
2512       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2513       for (l=0; l<c->nrows[k]; l++) {
2514         col = c->columnsforrow[k][l];      /* global column index */
2515         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2516       }
2517     }
2518     ierr = PetscFree(gtol);CHKERRQ(ierr);
2519   }
2520   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2521 
2522   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2523   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2524   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2525   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2526     CHKMEMQ;
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 #undef __FUNCT__
2531 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2532 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2533 {
2534   Mat            B;
2535   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2536   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2537   Mat_SeqAIJ     *b;
2538   PetscErrorCode ierr;
2539   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2540   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2541   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2542 
2543   PetscFunctionBegin;
2544   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2545   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2546 
2547   /* ----------------------------------------------------------------
2548      Tell every processor the number of nonzeros per row
2549   */
2550   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2551   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2552     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2553   }
2554   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2555   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2556   displs     = recvcounts + size;
2557   for (i=0; i<size; i++) {
2558     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2559     displs[i]     = A->rmap->range[i]/bs;
2560   }
2561 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2562   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2563 #else
2564   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2565 #endif
2566   /* ---------------------------------------------------------------
2567      Create the sequential matrix of the same type as the local block diagonal
2568   */
2569   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2570   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2571   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2572   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2573   b = (Mat_SeqAIJ *)B->data;
2574 
2575   /*--------------------------------------------------------------------
2576     Copy my part of matrix column indices over
2577   */
2578   sendcount  = ad->nz + bd->nz;
2579   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2580   a_jsendbuf = ad->j;
2581   b_jsendbuf = bd->j;
2582   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2583   cnt        = 0;
2584   for (i=0; i<n; i++) {
2585 
2586     /* put in lower diagonal portion */
2587     m = bd->i[i+1] - bd->i[i];
2588     while (m > 0) {
2589       /* is it above diagonal (in bd (compressed) numbering) */
2590       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2591       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2592       m--;
2593     }
2594 
2595     /* put in diagonal portion */
2596     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2597       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2598     }
2599 
2600     /* put in upper diagonal portion */
2601     while (m-- > 0) {
2602       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2603     }
2604   }
2605   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2606 
2607   /*--------------------------------------------------------------------
2608     Gather all column indices to all processors
2609   */
2610   for (i=0; i<size; i++) {
2611     recvcounts[i] = 0;
2612     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2613       recvcounts[i] += lens[j];
2614     }
2615   }
2616   displs[0]  = 0;
2617   for (i=1; i<size; i++) {
2618     displs[i] = displs[i-1] + recvcounts[i-1];
2619   }
2620 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2621   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2622 #else
2623   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2624 #endif
2625   /*--------------------------------------------------------------------
2626     Assemble the matrix into useable form (note numerical values not yet set)
2627   */
2628   /* set the b->ilen (length of each row) values */
2629   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2630   /* set the b->i indices */
2631   b->i[0] = 0;
2632   for (i=1; i<=A->rmap->N/bs; i++) {
2633     b->i[i] = b->i[i-1] + lens[i-1];
2634   }
2635   ierr = PetscFree(lens);CHKERRQ(ierr);
2636   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2637   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2638   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2639 
2640   if (A->symmetric) {
2641     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2642   } else if (A->hermitian) {
2643     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2644   } else if (A->structurally_symmetric) {
2645     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2646   }
2647   *newmat = B;
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 #undef __FUNCT__
2652 #define __FUNCT__ "MatSOR_MPIBAIJ"
2653 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2654 {
2655   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2656   PetscErrorCode ierr;
2657   Vec            bb1 = 0;
2658 
2659   PetscFunctionBegin;
2660   if (flag == SOR_APPLY_UPPER) {
2661     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2662     PetscFunctionReturn(0);
2663   }
2664 
2665   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2666     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2667   }
2668 
2669   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2670     if (flag & SOR_ZERO_INITIAL_GUESS) {
2671       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2672       its--;
2673     }
2674 
2675     while (its--) {
2676       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2677       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2678 
2679       /* update rhs: bb1 = bb - B*x */
2680       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2681       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2682 
2683       /* local sweep */
2684       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2685     }
2686   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2687     if (flag & SOR_ZERO_INITIAL_GUESS) {
2688       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2689       its--;
2690     }
2691     while (its--) {
2692       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2693       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2694 
2695       /* update rhs: bb1 = bb - B*x */
2696       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2697       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2698 
2699       /* local sweep */
2700       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2701     }
2702   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2703     if (flag & SOR_ZERO_INITIAL_GUESS) {
2704       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2705       its--;
2706     }
2707     while (its--) {
2708       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2709       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2710 
2711       /* update rhs: bb1 = bb - B*x */
2712       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2713       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2714 
2715       /* local sweep */
2716       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2717     }
2718   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2719 
2720   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2721   PetscFunctionReturn(0);
2722 }
2723 
2724 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2725 
2726 #undef __FUNCT__
2727 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2728 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2729 {
2730   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2731   PetscErrorCode ierr;
2732 
2733   PetscFunctionBegin;
2734   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2735   PetscFunctionReturn(0);
2736 }
2737 
2738 
2739 /* -------------------------------------------------------------------*/
2740 static struct _MatOps MatOps_Values = {
2741        MatSetValues_MPIBAIJ,
2742        MatGetRow_MPIBAIJ,
2743        MatRestoreRow_MPIBAIJ,
2744        MatMult_MPIBAIJ,
2745 /* 4*/ MatMultAdd_MPIBAIJ,
2746        MatMultTranspose_MPIBAIJ,
2747        MatMultTransposeAdd_MPIBAIJ,
2748        0,
2749        0,
2750        0,
2751 /*10*/ 0,
2752        0,
2753        0,
2754        MatSOR_MPIBAIJ,
2755        MatTranspose_MPIBAIJ,
2756 /*15*/ MatGetInfo_MPIBAIJ,
2757        MatEqual_MPIBAIJ,
2758        MatGetDiagonal_MPIBAIJ,
2759        MatDiagonalScale_MPIBAIJ,
2760        MatNorm_MPIBAIJ,
2761 /*20*/ MatAssemblyBegin_MPIBAIJ,
2762        MatAssemblyEnd_MPIBAIJ,
2763        MatSetOption_MPIBAIJ,
2764        MatZeroEntries_MPIBAIJ,
2765 /*24*/ MatZeroRows_MPIBAIJ,
2766        0,
2767        0,
2768        0,
2769        0,
2770 /*29*/ MatSetUp_MPIBAIJ,
2771        0,
2772        0,
2773        0,
2774        0,
2775 /*34*/ MatDuplicate_MPIBAIJ,
2776        0,
2777        0,
2778        0,
2779        0,
2780 /*39*/ MatAXPY_MPIBAIJ,
2781        MatGetSubMatrices_MPIBAIJ,
2782        MatIncreaseOverlap_MPIBAIJ,
2783        MatGetValues_MPIBAIJ,
2784        MatCopy_MPIBAIJ,
2785 /*44*/ 0,
2786        MatScale_MPIBAIJ,
2787        0,
2788        0,
2789        0,
2790 /*49*/ 0,
2791        0,
2792        0,
2793        0,
2794        0,
2795 /*54*/ MatFDColoringCreate_MPIBAIJ,
2796        0,
2797        MatSetUnfactored_MPIBAIJ,
2798        MatPermute_MPIBAIJ,
2799        MatSetValuesBlocked_MPIBAIJ,
2800 /*59*/ MatGetSubMatrix_MPIBAIJ,
2801        MatDestroy_MPIBAIJ,
2802        MatView_MPIBAIJ,
2803        0,
2804        0,
2805 /*64*/ 0,
2806        0,
2807        0,
2808        0,
2809        0,
2810 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2811        0,
2812        0,
2813        0,
2814        0,
2815 /*74*/ 0,
2816        MatFDColoringApply_BAIJ,
2817        0,
2818        0,
2819        0,
2820 /*79*/ 0,
2821        0,
2822        0,
2823        0,
2824        MatLoad_MPIBAIJ,
2825 /*84*/ 0,
2826        0,
2827        0,
2828        0,
2829        0,
2830 /*89*/ 0,
2831        0,
2832        0,
2833        0,
2834        0,
2835 /*94*/ 0,
2836        0,
2837        0,
2838        0,
2839        0,
2840 /*99*/ 0,
2841        0,
2842        0,
2843        0,
2844        0,
2845 /*104*/0,
2846        MatRealPart_MPIBAIJ,
2847        MatImaginaryPart_MPIBAIJ,
2848        0,
2849        0,
2850 /*109*/0,
2851        0,
2852        0,
2853        0,
2854        0,
2855 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2856        0,
2857        MatGetGhosts_MPIBAIJ,
2858        0,
2859        0,
2860 /*119*/0,
2861        0,
2862        0,
2863        0,
2864        0,
2865 /*124*/0,
2866        0,
2867        MatInvertBlockDiagonal_MPIBAIJ
2868 };
2869 
2870 EXTERN_C_BEGIN
2871 #undef __FUNCT__
2872 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2873 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2874 {
2875   PetscFunctionBegin;
2876   *a = ((Mat_MPIBAIJ *)A->data)->A;
2877   PetscFunctionReturn(0);
2878 }
2879 EXTERN_C_END
2880 
2881 EXTERN_C_BEGIN
2882 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2883 EXTERN_C_END
2884 
2885 EXTERN_C_BEGIN
2886 #undef __FUNCT__
2887 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2888 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2889 {
2890   PetscInt       m,rstart,cstart,cend;
2891   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2892   const PetscInt *JJ=0;
2893   PetscScalar    *values=0;
2894   PetscErrorCode ierr;
2895 
2896   PetscFunctionBegin;
2897   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2898   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2899   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2900   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2901   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2902   m      = B->rmap->n/bs;
2903   rstart = B->rmap->rstart/bs;
2904   cstart = B->cmap->rstart/bs;
2905   cend   = B->cmap->rend/bs;
2906 
2907   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2908   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2909   for (i=0; i<m; i++) {
2910     nz = ii[i+1] - ii[i];
2911     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2912     nz_max = PetscMax(nz_max,nz);
2913     JJ  = jj + ii[i];
2914     for (j=0; j<nz; j++) {
2915       if (*JJ >= cstart) break;
2916       JJ++;
2917     }
2918     d = 0;
2919     for (; j<nz; j++) {
2920       if (*JJ++ >= cend) break;
2921       d++;
2922     }
2923     d_nnz[i] = d;
2924     o_nnz[i] = nz - d;
2925   }
2926   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2927   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2928 
2929   values = (PetscScalar*)V;
2930   if (!values) {
2931     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2932     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2933   }
2934   for (i=0; i<m; i++) {
2935     PetscInt          row    = i + rstart;
2936     PetscInt          ncols  = ii[i+1] - ii[i];
2937     const PetscInt    *icols = jj + ii[i];
2938     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2939     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2940   }
2941 
2942   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2943   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2944   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2945   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2946   PetscFunctionReturn(0);
2947 }
2948 EXTERN_C_END
2949 
2950 #undef __FUNCT__
2951 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2952 /*@C
2953    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2954    (the default parallel PETSc format).
2955 
2956    Collective on MPI_Comm
2957 
2958    Input Parameters:
2959 +  A - the matrix
2960 .  bs - the block size
2961 .  i - the indices into j for the start of each local row (starts with zero)
2962 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2963 -  v - optional values in the matrix
2964 
2965    Level: developer
2966 
2967 .keywords: matrix, aij, compressed row, sparse, parallel
2968 
2969 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2970 @*/
2971 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2972 {
2973   PetscErrorCode ierr;
2974 
2975   PetscFunctionBegin;
2976   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2977   PetscValidType(B,1);
2978   PetscValidLogicalCollectiveInt(B,bs,2);
2979   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2980   PetscFunctionReturn(0);
2981 }
2982 
2983 EXTERN_C_BEGIN
2984 #undef __FUNCT__
2985 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2986 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2987 {
2988   Mat_MPIBAIJ    *b;
2989   PetscErrorCode ierr;
2990   PetscInt       i;
2991   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
2992 
2993   PetscFunctionBegin;
2994   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
2995   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
2996 
2997   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2998   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2999   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3000   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3001 
3002   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3003   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3004   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3005   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3006   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3007 
3008   if (d_nnz) {
3009     for (i=0; i<B->rmap->n/bs; i++) {
3010       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3011     }
3012   }
3013   if (o_nnz) {
3014     for (i=0; i<B->rmap->n/bs; i++) {
3015       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3016     }
3017   }
3018 
3019   b = (Mat_MPIBAIJ*)B->data;
3020   b->bs2 = bs*bs;
3021   b->mbs = B->rmap->n/bs;
3022   b->nbs = B->cmap->n/bs;
3023   b->Mbs = B->rmap->N/bs;
3024   b->Nbs = B->cmap->N/bs;
3025 
3026   for (i=0; i<=b->size; i++) {
3027     b->rangebs[i] = B->rmap->range[i]/bs;
3028   }
3029   b->rstartbs = B->rmap->rstart/bs;
3030   b->rendbs   = B->rmap->rend/bs;
3031   b->cstartbs = B->cmap->rstart/bs;
3032   b->cendbs   = B->cmap->rend/bs;
3033 
3034   if (!B->preallocated) {
3035     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3036     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3037     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3038     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3039     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3040     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3041     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3042     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3043     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3044   }
3045 
3046   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3047   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3048   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3049   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3050   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3051   B->preallocated = PETSC_TRUE;
3052   PetscFunctionReturn(0);
3053 }
3054 EXTERN_C_END
3055 
3056 EXTERN_C_BEGIN
3057 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3058 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3059 EXTERN_C_END
3060 
3061 
3062 EXTERN_C_BEGIN
3063 #undef __FUNCT__
3064 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3065 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3066 {
3067   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3068   PetscErrorCode ierr;
3069   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3070   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3071   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3072 
3073   PetscFunctionBegin;
3074   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3075   ii[0] = 0;
3076   CHKMEMQ;
3077   for (i=0; i<M; i++) {
3078     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3079     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3080     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3081     /* remove one from count of matrix has diagonal */
3082     for (j=id[i]; j<id[i+1]; j++) {
3083       if (jd[j] == i) {ii[i+1]--;break;}
3084     }
3085   CHKMEMQ;
3086   }
3087   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3088   cnt = 0;
3089   for (i=0; i<M; i++) {
3090     for (j=io[i]; j<io[i+1]; j++) {
3091       if (garray[jo[j]] > rstart) break;
3092       jj[cnt++] = garray[jo[j]];
3093   CHKMEMQ;
3094     }
3095     for (k=id[i]; k<id[i+1]; k++) {
3096       if (jd[k] != i) {
3097         jj[cnt++] = rstart + jd[k];
3098   CHKMEMQ;
3099       }
3100     }
3101     for (;j<io[i+1]; j++) {
3102       jj[cnt++] = garray[jo[j]];
3103   CHKMEMQ;
3104     }
3105   }
3106   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3107   PetscFunctionReturn(0);
3108 }
3109 EXTERN_C_END
3110 
3111 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3112 EXTERN_C_BEGIN
3113 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3114 EXTERN_C_END
3115 
3116 EXTERN_C_BEGIN
3117 #undef __FUNCT__
3118 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3119 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3120 {
3121   PetscErrorCode ierr;
3122   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3123   Mat            B;
3124   Mat_MPIAIJ     *b;
3125 
3126   PetscFunctionBegin;
3127   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3128 
3129   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3130   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3131   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3132   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3133   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3134   b = (Mat_MPIAIJ*) B->data;
3135 
3136   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3137   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3138   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3139   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3140   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3141   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3142   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3143   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3144   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3145   if (reuse == MAT_REUSE_MATRIX) {
3146     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3147   } else {
3148    *newmat = B;
3149   }
3150   PetscFunctionReturn(0);
3151 }
3152 EXTERN_C_END
3153 
3154 EXTERN_C_BEGIN
3155 #if defined(PETSC_HAVE_MUMPS)
3156 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3157 #endif
3158 EXTERN_C_END
3159 
3160 /*MC
3161    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3162 
3163    Options Database Keys:
3164 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3165 . -mat_block_size <bs> - set the blocksize used to store the matrix
3166 - -mat_use_hash_table <fact>
3167 
3168   Level: beginner
3169 
3170 .seealso: MatCreateMPIBAIJ
3171 M*/
3172 
3173 EXTERN_C_BEGIN
3174 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3175 EXTERN_C_END
3176 
3177 EXTERN_C_BEGIN
3178 #undef __FUNCT__
3179 #define __FUNCT__ "MatCreate_MPIBAIJ"
3180 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3181 {
3182   Mat_MPIBAIJ    *b;
3183   PetscErrorCode ierr;
3184   PetscBool      flg;
3185 
3186   PetscFunctionBegin;
3187   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3188   B->data = (void*)b;
3189 
3190   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3191   B->assembled  = PETSC_FALSE;
3192 
3193   B->insertmode = NOT_SET_VALUES;
3194   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3195   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3196 
3197   /* build local table of row and column ownerships */
3198   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3199 
3200   /* build cache for off array entries formed */
3201   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3202   b->donotstash  = PETSC_FALSE;
3203   b->colmap      = PETSC_NULL;
3204   b->garray      = PETSC_NULL;
3205   b->roworiented = PETSC_TRUE;
3206 
3207   /* stuff used in block assembly */
3208   b->barray       = 0;
3209 
3210   /* stuff used for matrix vector multiply */
3211   b->lvec         = 0;
3212   b->Mvctx        = 0;
3213 
3214   /* stuff for MatGetRow() */
3215   b->rowindices   = 0;
3216   b->rowvalues    = 0;
3217   b->getrowactive = PETSC_FALSE;
3218 
3219   /* hash table stuff */
3220   b->ht           = 0;
3221   b->hd           = 0;
3222   b->ht_size      = 0;
3223   b->ht_flag      = PETSC_FALSE;
3224   b->ht_fact      = 0;
3225   b->ht_total_ct  = 0;
3226   b->ht_insert_ct = 0;
3227 
3228   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3229   b->ijonly       = PETSC_FALSE;
3230 
3231   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3232     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3233     if (flg) {
3234       PetscReal fact = 1.39;
3235       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3236       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3237       if (fact <= 1.0) fact = 1.39;
3238       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3239       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3240     }
3241   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3242 
3243 #if defined(PETSC_HAVE_MUMPS)
3244   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3245 #endif
3246   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3247                                      "MatConvert_MPIBAIJ_MPIAdj",
3248                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3249   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3250                                      "MatConvert_MPIBAIJ_MPIAIJ",
3251                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3252   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3253                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3254                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3255   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3256                                      "MatStoreValues_MPIBAIJ",
3257                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3258   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3259                                      "MatRetrieveValues_MPIBAIJ",
3260                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3261   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3262                                      "MatGetDiagonalBlock_MPIBAIJ",
3263                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3264   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3265                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3266                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3267   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3268                                      "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3269                                      MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3270   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3271                                      "MatDiagonalScaleLocal_MPIBAIJ",
3272                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3273   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3274                                      "MatSetHashTableFactor_MPIBAIJ",
3275                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3276   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3277                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3278                                       MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3279   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3280   PetscFunctionReturn(0);
3281 }
3282 EXTERN_C_END
3283 
3284 /*MC
3285    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3286 
3287    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3288    and MATMPIBAIJ otherwise.
3289 
3290    Options Database Keys:
3291 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3292 
3293   Level: beginner
3294 
3295 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3296 M*/
3297 
3298 #undef __FUNCT__
3299 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3300 /*@C
3301    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3302    (block compressed row).  For good matrix assembly performance
3303    the user should preallocate the matrix storage by setting the parameters
3304    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3305    performance can be increased by more than a factor of 50.
3306 
3307    Collective on Mat
3308 
3309    Input Parameters:
3310 +  A - the matrix
3311 .  bs   - size of blockk
3312 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3313            submatrix  (same for all local rows)
3314 .  d_nnz - array containing the number of block nonzeros in the various block rows
3315            of the in diagonal portion of the local (possibly different for each block
3316            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3317            set it even if it is zero.
3318 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3319            submatrix (same for all local rows).
3320 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3321            off-diagonal portion of the local submatrix (possibly different for
3322            each block row) or PETSC_NULL.
3323 
3324    If the *_nnz parameter is given then the *_nz parameter is ignored
3325 
3326    Options Database Keys:
3327 +   -mat_block_size - size of the blocks to use
3328 -   -mat_use_hash_table <fact>
3329 
3330    Notes:
3331    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3332    than it must be used on all processors that share the object for that argument.
3333 
3334    Storage Information:
3335    For a square global matrix we define each processor's diagonal portion
3336    to be its local rows and the corresponding columns (a square submatrix);
3337    each processor's off-diagonal portion encompasses the remainder of the
3338    local matrix (a rectangular submatrix).
3339 
3340    The user can specify preallocated storage for the diagonal part of
3341    the local submatrix with either d_nz or d_nnz (not both).  Set
3342    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3343    memory allocation.  Likewise, specify preallocated storage for the
3344    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3345 
3346    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3347    the figure below we depict these three local rows and all columns (0-11).
3348 
3349 .vb
3350            0 1 2 3 4 5 6 7 8 9 10 11
3351           -------------------
3352    row 3  |  o o o d d d o o o o o o
3353    row 4  |  o o o d d d o o o o o o
3354    row 5  |  o o o d d d o o o o o o
3355           -------------------
3356 .ve
3357 
3358    Thus, any entries in the d locations are stored in the d (diagonal)
3359    submatrix, and any entries in the o locations are stored in the
3360    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3361    stored simply in the MATSEQBAIJ format for compressed row storage.
3362 
3363    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3364    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3365    In general, for PDE problems in which most nonzeros are near the diagonal,
3366    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3367    or you will get TERRIBLE performance; see the users' manual chapter on
3368    matrices.
3369 
3370    You can call MatGetInfo() to get information on how effective the preallocation was;
3371    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3372    You can also run with the option -info and look for messages with the string
3373    malloc in them to see if additional memory allocation was needed.
3374 
3375    Level: intermediate
3376 
3377 .keywords: matrix, block, aij, compressed row, sparse, parallel
3378 
3379 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3380 @*/
3381 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3382 {
3383   PetscErrorCode ierr;
3384 
3385   PetscFunctionBegin;
3386   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3387   PetscValidType(B,1);
3388   PetscValidLogicalCollectiveInt(B,bs,2);
3389   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3390   PetscFunctionReturn(0);
3391 }
3392 
3393 #undef __FUNCT__
3394 #define __FUNCT__ "MatCreateBAIJ"
3395 /*@C
3396    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3397    (block compressed row).  For good matrix assembly performance
3398    the user should preallocate the matrix storage by setting the parameters
3399    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3400    performance can be increased by more than a factor of 50.
3401 
3402    Collective on MPI_Comm
3403 
3404    Input Parameters:
3405 +  comm - MPI communicator
3406 .  bs   - size of blockk
3407 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3408            This value should be the same as the local size used in creating the
3409            y vector for the matrix-vector product y = Ax.
3410 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3411            This value should be the same as the local size used in creating the
3412            x vector for the matrix-vector product y = Ax.
3413 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3414 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3415 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3416            submatrix  (same for all local rows)
3417 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3418            of the in diagonal portion of the local (possibly different for each block
3419            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3420            and set it even if it is zero.
3421 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3422            submatrix (same for all local rows).
3423 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3424            off-diagonal portion of the local submatrix (possibly different for
3425            each block row) or PETSC_NULL.
3426 
3427    Output Parameter:
3428 .  A - the matrix
3429 
3430    Options Database Keys:
3431 +   -mat_block_size - size of the blocks to use
3432 -   -mat_use_hash_table <fact>
3433 
3434    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3435    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3436    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3437 
3438    Notes:
3439    If the *_nnz parameter is given then the *_nz parameter is ignored
3440 
3441    A nonzero block is any block that as 1 or more nonzeros in it
3442 
3443    The user MUST specify either the local or global matrix dimensions
3444    (possibly both).
3445 
3446    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3447    than it must be used on all processors that share the object for that argument.
3448 
3449    Storage Information:
3450    For a square global matrix we define each processor's diagonal portion
3451    to be its local rows and the corresponding columns (a square submatrix);
3452    each processor's off-diagonal portion encompasses the remainder of the
3453    local matrix (a rectangular submatrix).
3454 
3455    The user can specify preallocated storage for the diagonal part of
3456    the local submatrix with either d_nz or d_nnz (not both).  Set
3457    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3458    memory allocation.  Likewise, specify preallocated storage for the
3459    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3460 
3461    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3462    the figure below we depict these three local rows and all columns (0-11).
3463 
3464 .vb
3465            0 1 2 3 4 5 6 7 8 9 10 11
3466           -------------------
3467    row 3  |  o o o d d d o o o o o o
3468    row 4  |  o o o d d d o o o o o o
3469    row 5  |  o o o d d d o o o o o o
3470           -------------------
3471 .ve
3472 
3473    Thus, any entries in the d locations are stored in the d (diagonal)
3474    submatrix, and any entries in the o locations are stored in the
3475    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3476    stored simply in the MATSEQBAIJ format for compressed row storage.
3477 
3478    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3479    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3480    In general, for PDE problems in which most nonzeros are near the diagonal,
3481    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3482    or you will get TERRIBLE performance; see the users' manual chapter on
3483    matrices.
3484 
3485    Level: intermediate
3486 
3487 .keywords: matrix, block, aij, compressed row, sparse, parallel
3488 
3489 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3490 @*/
3491 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3492 {
3493   PetscErrorCode ierr;
3494   PetscMPIInt    size;
3495 
3496   PetscFunctionBegin;
3497   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3498   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3499   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3500   if (size > 1) {
3501     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3502     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3503   } else {
3504     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3505     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3506   }
3507   PetscFunctionReturn(0);
3508 }
3509 
3510 #undef __FUNCT__
3511 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3512 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3513 {
3514   Mat            mat;
3515   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3516   PetscErrorCode ierr;
3517   PetscInt       len=0;
3518 
3519   PetscFunctionBegin;
3520   *newmat       = 0;
3521   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3522   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3523   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3524   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3525 
3526   mat->factortype   = matin->factortype;
3527   mat->preallocated = PETSC_TRUE;
3528   mat->assembled    = PETSC_TRUE;
3529   mat->insertmode   = NOT_SET_VALUES;
3530 
3531   a      = (Mat_MPIBAIJ*)mat->data;
3532   mat->rmap->bs  = matin->rmap->bs;
3533   a->bs2   = oldmat->bs2;
3534   a->mbs   = oldmat->mbs;
3535   a->nbs   = oldmat->nbs;
3536   a->Mbs   = oldmat->Mbs;
3537   a->Nbs   = oldmat->Nbs;
3538 
3539   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3540   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3541 
3542   a->size         = oldmat->size;
3543   a->rank         = oldmat->rank;
3544   a->donotstash   = oldmat->donotstash;
3545   a->roworiented  = oldmat->roworiented;
3546   a->rowindices   = 0;
3547   a->rowvalues    = 0;
3548   a->getrowactive = PETSC_FALSE;
3549   a->barray       = 0;
3550   a->rstartbs     = oldmat->rstartbs;
3551   a->rendbs       = oldmat->rendbs;
3552   a->cstartbs     = oldmat->cstartbs;
3553   a->cendbs       = oldmat->cendbs;
3554 
3555   /* hash table stuff */
3556   a->ht           = 0;
3557   a->hd           = 0;
3558   a->ht_size      = 0;
3559   a->ht_flag      = oldmat->ht_flag;
3560   a->ht_fact      = oldmat->ht_fact;
3561   a->ht_total_ct  = 0;
3562   a->ht_insert_ct = 0;
3563 
3564   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3565   if (oldmat->colmap) {
3566 #if defined (PETSC_USE_CTABLE)
3567   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3568 #else
3569   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3570   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3571   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3572 #endif
3573   } else a->colmap = 0;
3574 
3575   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3576     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3577     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3578     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3579   } else a->garray = 0;
3580 
3581   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3582   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3583   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3584   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3585   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3586 
3587   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3588   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3589   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3590   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3591   ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3592   *newmat = mat;
3593 
3594   PetscFunctionReturn(0);
3595 }
3596 
3597 #undef __FUNCT__
3598 #define __FUNCT__ "MatLoad_MPIBAIJ"
3599 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3600 {
3601   PetscErrorCode ierr;
3602   int            fd;
3603   PetscInt       i,nz,j,rstart,rend;
3604   PetscScalar    *vals,*buf;
3605   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3606   MPI_Status     status;
3607   PetscMPIInt    rank,size,maxnz;
3608   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3609   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3610   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3611   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3612   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3613   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3614 
3615   PetscFunctionBegin;
3616   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3617     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3618   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3619 
3620   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3621   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3622   if (!rank) {
3623     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3624     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3625     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3626   }
3627 
3628   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3629 
3630   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3631   M = header[1]; N = header[2];
3632 
3633   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3634   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3635   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3636 
3637   /* If global sizes are set, check if they are consistent with that given in the file */
3638   if (sizesset) {
3639     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3640   }
3641   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3642   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3643 
3644   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3645 
3646   /*
3647      This code adds extra rows to make sure the number of rows is
3648      divisible by the blocksize
3649   */
3650   Mbs        = M/bs;
3651   extra_rows = bs - M + bs*Mbs;
3652   if (extra_rows == bs) extra_rows = 0;
3653   else                  Mbs++;
3654   if (extra_rows && !rank) {
3655     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3656   }
3657 
3658   /* determine ownership of all rows */
3659   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3660     mbs        = Mbs/size + ((Mbs % size) > rank);
3661     m          = mbs*bs;
3662   } else { /* User set */
3663     m          = newmat->rmap->n;
3664     mbs        = m/bs;
3665   }
3666   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3667   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3668 
3669   /* process 0 needs enough room for process with most rows */
3670   if (!rank) {
3671     mmax = rowners[1];
3672     for (i=2; i<=size; i++) {
3673       mmax = PetscMax(mmax,rowners[i]);
3674     }
3675     mmax*=bs;
3676   } else mmax = m;
3677 
3678   rowners[0] = 0;
3679   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3680   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3681   rstart = rowners[rank];
3682   rend   = rowners[rank+1];
3683 
3684   /* distribute row lengths to all processors */
3685   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3686   if (!rank) {
3687     mend = m;
3688     if (size == 1) mend = mend - extra_rows;
3689     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3690     for (j=mend; j<m; j++) locrowlens[j] = 1;
3691     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3692     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3693     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3694     for (j=0; j<m; j++) {
3695       procsnz[0] += locrowlens[j];
3696     }
3697     for (i=1; i<size; i++) {
3698       mend = browners[i+1] - browners[i];
3699       if (i == size-1) mend = mend - extra_rows;
3700       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3701       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3702       /* calculate the number of nonzeros on each processor */
3703       for (j=0; j<browners[i+1]-browners[i]; j++) {
3704         procsnz[i] += rowlengths[j];
3705       }
3706       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3707     }
3708     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3709   } else {
3710     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3711   }
3712 
3713   if (!rank) {
3714     /* determine max buffer needed and allocate it */
3715     maxnz = procsnz[0];
3716     for (i=1; i<size; i++) {
3717       maxnz = PetscMax(maxnz,procsnz[i]);
3718     }
3719     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3720 
3721     /* read in my part of the matrix column indices  */
3722     nz     = procsnz[0];
3723     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3724     mycols = ibuf;
3725     if (size == 1)  nz -= extra_rows;
3726     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3727     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3728 
3729     /* read in every ones (except the last) and ship off */
3730     for (i=1; i<size-1; i++) {
3731       nz   = procsnz[i];
3732       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3733       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3734     }
3735     /* read in the stuff for the last proc */
3736     if (size != 1) {
3737       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3738       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3739       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3740       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3741     }
3742     ierr = PetscFree(cols);CHKERRQ(ierr);
3743   } else {
3744     /* determine buffer space needed for message */
3745     nz = 0;
3746     for (i=0; i<m; i++) {
3747       nz += locrowlens[i];
3748     }
3749     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3750     mycols = ibuf;
3751     /* receive message of column indices*/
3752     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3753     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3754     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3755   }
3756 
3757   /* loop over local rows, determining number of off diagonal entries */
3758   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3759   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3760   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3761   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3762   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3763   rowcount = 0; nzcount = 0;
3764   for (i=0; i<mbs; i++) {
3765     dcount  = 0;
3766     odcount = 0;
3767     for (j=0; j<bs; j++) {
3768       kmax = locrowlens[rowcount];
3769       for (k=0; k<kmax; k++) {
3770         tmp = mycols[nzcount++]/bs;
3771         if (!mask[tmp]) {
3772           mask[tmp] = 1;
3773           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3774           else masked1[dcount++] = tmp;
3775         }
3776       }
3777       rowcount++;
3778     }
3779 
3780     dlens[i]  = dcount;
3781     odlens[i] = odcount;
3782 
3783     /* zero out the mask elements we set */
3784     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3785     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3786   }
3787 
3788 
3789   if (!sizesset) {
3790     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3791   }
3792   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3793 
3794   if (!rank) {
3795     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3796     /* read in my part of the matrix numerical values  */
3797     nz = procsnz[0];
3798     vals = buf;
3799     mycols = ibuf;
3800     if (size == 1)  nz -= extra_rows;
3801     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3802     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3803 
3804     /* insert into matrix */
3805     jj      = rstart*bs;
3806     for (i=0; i<m; i++) {
3807       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3808       mycols += locrowlens[i];
3809       vals   += locrowlens[i];
3810       jj++;
3811     }
3812     /* read in other processors (except the last one) and ship out */
3813     for (i=1; i<size-1; i++) {
3814       nz   = procsnz[i];
3815       vals = buf;
3816       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3817       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3818     }
3819     /* the last proc */
3820     if (size != 1) {
3821       nz   = procsnz[i] - extra_rows;
3822       vals = buf;
3823       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3824       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3825       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3826     }
3827     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3828   } else {
3829     /* receive numeric values */
3830     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3831 
3832     /* receive message of values*/
3833     vals   = buf;
3834     mycols = ibuf;
3835     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3836 
3837     /* insert into matrix */
3838     jj      = rstart*bs;
3839     for (i=0; i<m; i++) {
3840       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3841       mycols += locrowlens[i];
3842       vals   += locrowlens[i];
3843       jj++;
3844     }
3845   }
3846   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3847   ierr = PetscFree(buf);CHKERRQ(ierr);
3848   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3849   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3850   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3851   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3852   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3853   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3854 
3855   PetscFunctionReturn(0);
3856 }
3857 
3858 #undef __FUNCT__
3859 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3860 /*@
3861    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3862 
3863    Input Parameters:
3864 .  mat  - the matrix
3865 .  fact - factor
3866 
3867    Not Collective, each process can use a different factor
3868 
3869    Level: advanced
3870 
3871   Notes:
3872    This can also be set by the command line option: -mat_use_hash_table <fact>
3873 
3874 .keywords: matrix, hashtable, factor, HT
3875 
3876 .seealso: MatSetOption()
3877 @*/
3878 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3879 {
3880   PetscErrorCode ierr;
3881 
3882   PetscFunctionBegin;
3883   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3884   PetscFunctionReturn(0);
3885 }
3886 
3887 EXTERN_C_BEGIN
3888 #undef __FUNCT__
3889 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3890 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3891 {
3892   Mat_MPIBAIJ *baij;
3893 
3894   PetscFunctionBegin;
3895   baij = (Mat_MPIBAIJ*)mat->data;
3896   baij->ht_fact = fact;
3897   PetscFunctionReturn(0);
3898 }
3899 EXTERN_C_END
3900 
3901 #undef __FUNCT__
3902 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3903 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3904 {
3905   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3906 
3907   PetscFunctionBegin;
3908   *Ad     = a->A;
3909   *Ao     = a->B;
3910   *colmap = a->garray;
3911   PetscFunctionReturn(0);
3912 }
3913 
3914 /*
3915     Special version for direct calls from Fortran (to eliminate two function call overheads
3916 */
3917 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3918 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3919 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3920 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3921 #endif
3922 
3923 #undef __FUNCT__
3924 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3925 /*@C
3926   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3927 
3928   Collective on Mat
3929 
3930   Input Parameters:
3931 + mat - the matrix
3932 . min - number of input rows
3933 . im - input rows
3934 . nin - number of input columns
3935 . in - input columns
3936 . v - numerical values input
3937 - addvin - INSERT_VALUES or ADD_VALUES
3938 
3939   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3940 
3941   Level: advanced
3942 
3943 .seealso:   MatSetValuesBlocked()
3944 @*/
3945 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3946 {
3947   /* convert input arguments to C version */
3948   Mat             mat = *matin;
3949   PetscInt        m = *min, n = *nin;
3950   InsertMode      addv = *addvin;
3951 
3952   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3953   const MatScalar *value;
3954   MatScalar       *barray=baij->barray;
3955   PetscBool       roworiented = baij->roworiented;
3956   PetscErrorCode  ierr;
3957   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3958   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3959   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3960 
3961   PetscFunctionBegin;
3962   /* tasks normally handled by MatSetValuesBlocked() */
3963   if (mat->insertmode == NOT_SET_VALUES) {
3964     mat->insertmode = addv;
3965   }
3966 #if defined(PETSC_USE_DEBUG)
3967   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3968   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3969 #endif
3970   if (mat->assembled) {
3971     mat->was_assembled = PETSC_TRUE;
3972     mat->assembled     = PETSC_FALSE;
3973   }
3974   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3975 
3976 
3977   if (!barray) {
3978     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3979     baij->barray = barray;
3980   }
3981 
3982   if (roworiented) {
3983     stepval = (n-1)*bs;
3984   } else {
3985     stepval = (m-1)*bs;
3986   }
3987   for (i=0; i<m; i++) {
3988     if (im[i] < 0) continue;
3989 #if defined(PETSC_USE_DEBUG)
3990     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3991 #endif
3992     if (im[i] >= rstart && im[i] < rend) {
3993       row = im[i] - rstart;
3994       for (j=0; j<n; j++) {
3995         /* If NumCol = 1 then a copy is not required */
3996         if ((roworiented) && (n == 1)) {
3997           barray = (MatScalar*)v + i*bs2;
3998         } else if ((!roworiented) && (m == 1)) {
3999           barray = (MatScalar*)v + j*bs2;
4000         } else { /* Here a copy is required */
4001           if (roworiented) {
4002             value = v + i*(stepval+bs)*bs + j*bs;
4003           } else {
4004             value = v + j*(stepval+bs)*bs + i*bs;
4005           }
4006           for (ii=0; ii<bs; ii++,value+=stepval) {
4007             for (jj=0; jj<bs; jj++) {
4008               *barray++  = *value++;
4009             }
4010           }
4011           barray -=bs2;
4012         }
4013 
4014         if (in[j] >= cstart && in[j] < cend) {
4015           col  = in[j] - cstart;
4016           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4017         }
4018         else if (in[j] < 0) continue;
4019 #if defined(PETSC_USE_DEBUG)
4020         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4021 #endif
4022         else {
4023           if (mat->was_assembled) {
4024             if (!baij->colmap) {
4025               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4026             }
4027 
4028 #if defined(PETSC_USE_DEBUG)
4029 #if defined (PETSC_USE_CTABLE)
4030             { PetscInt data;
4031               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4032               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4033             }
4034 #else
4035             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4036 #endif
4037 #endif
4038 #if defined (PETSC_USE_CTABLE)
4039             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4040             col  = (col - 1)/bs;
4041 #else
4042             col = (baij->colmap[in[j]] - 1)/bs;
4043 #endif
4044             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4045               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4046               col =  in[j];
4047             }
4048           }
4049           else col = in[j];
4050           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4051         }
4052       }
4053     } else {
4054       if (!baij->donotstash) {
4055         if (roworiented) {
4056           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4057         } else {
4058           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4059         }
4060       }
4061     }
4062   }
4063 
4064   /* task normally handled by MatSetValuesBlocked() */
4065   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4066   PetscFunctionReturn(0);
4067 }
4068 
4069 #undef __FUNCT__
4070 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4071 /*@
4072      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4073          CSR format the local rows.
4074 
4075    Collective on MPI_Comm
4076 
4077    Input Parameters:
4078 +  comm - MPI communicator
4079 .  bs - the block size, only a block size of 1 is supported
4080 .  m - number of local rows (Cannot be PETSC_DECIDE)
4081 .  n - This value should be the same as the local size used in creating the
4082        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4083        calculated if N is given) For square matrices n is almost always m.
4084 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4085 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4086 .   i - row indices
4087 .   j - column indices
4088 -   a - matrix values
4089 
4090    Output Parameter:
4091 .   mat - the matrix
4092 
4093    Level: intermediate
4094 
4095    Notes:
4096        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4097      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4098      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4099 
4100        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4101 
4102 .keywords: matrix, aij, compressed row, sparse, parallel
4103 
4104 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4105           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4106 @*/
4107 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4108 {
4109   PetscErrorCode ierr;
4110 
4111   PetscFunctionBegin;
4112   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4113   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4114   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4115   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4116   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4117   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4118   PetscFunctionReturn(0);
4119 }
4120