xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 245f004be2b2fca55a62739415aedaade1b4b42e)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {
29       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
30     }
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++) {
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
40       va[i] = vb[i];
41       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
42     }
43   }
44 
45   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
46   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
47   ierr = PetscFree(idxb);CHKERRQ(ierr);
48   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 EXTERN_C_BEGIN
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 EXTERN_C_END
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
70 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
77   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 /*
83      Local utility routine that creates a mapping from the global column
84    number to the local number in the off-diagonal part of the local
85    storage of the matrix.  This is done in a non scalable way since the
86    length of colmap equals the global matrix length.
87 */
88 #undef __FUNCT__
89 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
90 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
91 {
92   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
93   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
94   PetscErrorCode ierr;
95   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
96 
97   PetscFunctionBegin;
98 #if defined(PETSC_USE_CTABLE)
99   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
100   for (i=0; i<nbs; i++) {
101     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
102   }
103 #else
104   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
105   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
106   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
107   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
108 #endif
109   PetscFunctionReturn(0);
110 }
111 
112 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
113   { \
114  \
115     brow = row/bs;  \
116     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
117     rmax = aimax[brow]; nrow = ailen[brow]; \
118     bcol = col/bs; \
119     ridx = row % bs; cidx = col % bs; \
120     low  = 0; high = nrow; \
121     while (high-low > 3) { \
122       t = (low+high)/2; \
123       if (rp[t] > bcol) high = t; \
124       else              low  = t; \
125     } \
126     for (_i=low; _i<high; _i++) { \
127       if (rp[_i] > bcol) break; \
128       if (rp[_i] == bcol) { \
129         bap = ap +  bs2*_i + bs*cidx + ridx; \
130         if (addv == ADD_VALUES) *bap += value;  \
131         else                    *bap  = value;  \
132         goto a_noinsert; \
133       } \
134     } \
135     if (a->nonew == 1) goto a_noinsert; \
136     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
137     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
138     N = nrow++ - 1;  \
139     /* shift up all the later entries in this row */ \
140     for (ii=N; ii>=_i; ii--) { \
141       rp[ii+1] = rp[ii]; \
142       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
143     } \
144     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
145     rp[_i]                      = bcol;  \
146     ap[bs2*_i + bs*cidx + ridx] = value;  \
147 a_noinsert:; \
148     ailen[brow] = nrow; \
149   }
150 
151 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
152   { \
153     brow = row/bs;  \
154     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
155     rmax = bimax[brow]; nrow = bilen[brow]; \
156     bcol = col/bs; \
157     ridx = row % bs; cidx = col % bs; \
158     low  = 0; high = nrow; \
159     while (high-low > 3) { \
160       t = (low+high)/2; \
161       if (rp[t] > bcol) high = t; \
162       else              low  = t; \
163     } \
164     for (_i=low; _i<high; _i++) { \
165       if (rp[_i] > bcol) break; \
166       if (rp[_i] == bcol) { \
167         bap = ap +  bs2*_i + bs*cidx + ridx; \
168         if (addv == ADD_VALUES) *bap += value;  \
169         else                    *bap  = value;  \
170         goto b_noinsert; \
171       } \
172     } \
173     if (b->nonew == 1) goto b_noinsert; \
174     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
175     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
176     CHKMEMQ; \
177     N = nrow++ - 1;  \
178     /* shift up all the later entries in this row */ \
179     for (ii=N; ii>=_i; ii--) { \
180       rp[ii+1] = rp[ii]; \
181       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
182     } \
183     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
184     rp[_i]                      = bcol;  \
185     ap[bs2*_i + bs*cidx + ridx] = value;  \
186 b_noinsert:; \
187     bilen[brow] = nrow; \
188   }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatSetValues_MPIBAIJ"
192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
193 {
194   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
195   MatScalar      value;
196   PetscBool      roworiented = baij->roworiented;
197   PetscErrorCode ierr;
198   PetscInt       i,j,row,col;
199   PetscInt       rstart_orig=mat->rmap->rstart;
200   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
201   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
202 
203   /* Some Variables required in the macro */
204   Mat         A     = baij->A;
205   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
206   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
207   MatScalar   *aa   =a->a;
208 
209   Mat         B     = baij->B;
210   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
211   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
212   MatScalar   *ba   =b->a;
213 
214   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
215   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
216   MatScalar *ap,*bap;
217 
218   PetscFunctionBegin;
219   if (v) PetscValidScalarPointer(v,6);
220   for (i=0; i<m; i++) {
221     if (im[i] < 0) continue;
222 #if defined(PETSC_USE_DEBUG)
223     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
224 #endif
225     if (im[i] >= rstart_orig && im[i] < rend_orig) {
226       row = im[i] - rstart_orig;
227       for (j=0; j<n; j++) {
228         if (in[j] >= cstart_orig && in[j] < cend_orig) {
229           col = in[j] - cstart_orig;
230           if (roworiented) value = v[i*n+j];
231           else             value = v[i+j*m];
232           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
233           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
234         } else if (in[j] < 0) continue;
235 #if defined(PETSC_USE_DEBUG)
236         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
237 #endif
238         else {
239           if (mat->was_assembled) {
240             if (!baij->colmap) {
241               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
242             }
243 #if defined(PETSC_USE_CTABLE)
244             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
245             col  = col - 1;
246 #else
247             col = baij->colmap[in[j]/bs] - 1;
248 #endif
249             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
250               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
251               col  =  in[j];
252               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
253               B    = baij->B;
254               b    = (Mat_SeqBAIJ*)(B)->data;
255               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
256               ba   =b->a;
257             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
258             else col += in[j]%bs;
259           } else col = in[j];
260           if (roworiented) value = v[i*n+j];
261           else             value = v[i+j*m];
262           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
263           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
264         }
265       }
266     } else {
267       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
268       if (!baij->donotstash) {
269         mat->assembled = PETSC_FALSE;
270         if (roworiented) {
271           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
272         } else {
273           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
274         }
275       }
276     }
277   }
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
283 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
284 {
285   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
286   const PetscScalar *value;
287   MatScalar         *barray     = baij->barray;
288   PetscBool         roworiented = baij->roworiented;
289   PetscErrorCode    ierr;
290   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
291   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
292   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
293 
294   PetscFunctionBegin;
295   if (!barray) {
296     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
297     baij->barray = barray;
298   }
299 
300   if (roworiented) stepval = (n-1)*bs;
301   else stepval = (m-1)*bs;
302 
303   for (i=0; i<m; i++) {
304     if (im[i] < 0) continue;
305 #if defined(PETSC_USE_DEBUG)
306     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
307 #endif
308     if (im[i] >= rstart && im[i] < rend) {
309       row = im[i] - rstart;
310       for (j=0; j<n; j++) {
311         /* If NumCol = 1 then a copy is not required */
312         if ((roworiented) && (n == 1)) {
313           barray = (MatScalar*)v + i*bs2;
314         } else if ((!roworiented) && (m == 1)) {
315           barray = (MatScalar*)v + j*bs2;
316         } else { /* Here a copy is required */
317           if (roworiented) {
318             value = v + (i*(stepval+bs) + j)*bs;
319           } else {
320             value = v + (j*(stepval+bs) + i)*bs;
321           }
322           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
323             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
324             barray += bs;
325           }
326           barray -= bs2;
327         }
328 
329         if (in[j] >= cstart && in[j] < cend) {
330           col  = in[j] - cstart;
331           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
332         } else if (in[j] < 0) continue;
333 #if defined(PETSC_USE_DEBUG)
334         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
335 #endif
336         else {
337           if (mat->was_assembled) {
338             if (!baij->colmap) {
339               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
340             }
341 
342 #if defined(PETSC_USE_DEBUG)
343 #if defined(PETSC_USE_CTABLE)
344             { PetscInt data;
345               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
346               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
347             }
348 #else
349             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
350 #endif
351 #endif
352 #if defined(PETSC_USE_CTABLE)
353             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
354             col  = (col - 1)/bs;
355 #else
356             col = (baij->colmap[in[j]] - 1)/bs;
357 #endif
358             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
359               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
360               col  =  in[j];
361             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
362           } else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
389   PetscBool      roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408     row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j];
413         else             value = v[i+j*m];
414         /* Look up PetscInto the Hash Table */
415         key = (row/bs)*Nbs+(col/bs)+1;
416         h1  = HASH(size,key,tmp);
417 
418 
419         idx = h1;
420 #if defined(PETSC_USE_DEBUG)
421         insert_ct++;
422         total_ct++;
423         if (HT[idx] != key) {
424           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
425           if (idx == size) {
426             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
427             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
428           }
429         }
430 #else
431         if (HT[idx] != key) {
432           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
433           if (idx == size) {
434             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
435             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else if (!baij->donotstash) {
444       if (roworiented) {
445         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
446       } else {
447         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
448       }
449     }
450   }
451 #if defined(PETSC_USE_DEBUG)
452   baij->ht_total_ct  = total_ct;
453   baij->ht_insert_ct = insert_ct;
454 #endif
455   PetscFunctionReturn(0);
456 }
457 
458 #undef __FUNCT__
459 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
460 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
461 {
462   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
463   PetscBool         roworiented = baij->roworiented;
464   PetscErrorCode    ierr;
465   PetscInt          i,j,ii,jj,row,col;
466   PetscInt          rstart=baij->rstartbs;
467   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
468   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
469   PetscReal         tmp;
470   MatScalar         **HD = baij->hd,*baij_a;
471   const PetscScalar *v_t,*value;
472 #if defined(PETSC_USE_DEBUG)
473   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
474 #endif
475 
476   PetscFunctionBegin;
477   if (roworiented) stepval = (n-1)*bs;
478   else stepval = (m-1)*bs;
479 
480   for (i=0; i<m; i++) {
481 #if defined(PETSC_USE_DEBUG)
482     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
483     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
484 #endif
485     row = im[i];
486     v_t = v + i*nbs2;
487     if (row >= rstart && row < rend) {
488       for (j=0; j<n; j++) {
489         col = in[j];
490 
491         /* Look up into the Hash Table */
492         key = row*Nbs+col+1;
493         h1  = HASH(size,key,tmp);
494 
495         idx = h1;
496 #if defined(PETSC_USE_DEBUG)
497         total_ct++;
498         insert_ct++;
499         if (HT[idx] != key) {
500           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
501           if (idx == size) {
502             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
503             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
504           }
505         }
506 #else
507         if (HT[idx] != key) {
508           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
509           if (idx == size) {
510             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
511             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
512           }
513         }
514 #endif
515         baij_a = HD[idx];
516         if (roworiented) {
517           /*value = v + i*(stepval+bs)*bs + j*bs;*/
518           /* value = v + (i*(stepval+bs)+j)*bs; */
519           value = v_t;
520           v_t  += bs;
521           if (addv == ADD_VALUES) {
522             for (ii=0; ii<bs; ii++,value+=stepval) {
523               for (jj=ii; jj<bs2; jj+=bs) {
524                 baij_a[jj] += *value++;
525               }
526             }
527           } else {
528             for (ii=0; ii<bs; ii++,value+=stepval) {
529               for (jj=ii; jj<bs2; jj+=bs) {
530                 baij_a[jj] = *value++;
531               }
532             }
533           }
534         } else {
535           value = v + j*(stepval+bs)*bs + i*bs;
536           if (addv == ADD_VALUES) {
537             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
538               for (jj=0; jj<bs; jj++) {
539                 baij_a[jj] += *value++;
540               }
541             }
542           } else {
543             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
544               for (jj=0; jj<bs; jj++) {
545                 baij_a[jj] = *value++;
546               }
547             }
548           }
549         }
550       }
551     } else {
552       if (!baij->donotstash) {
553         if (roworiented) {
554           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
555         } else {
556           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
557         }
558       }
559     }
560   }
561 #if defined(PETSC_USE_DEBUG)
562   baij->ht_total_ct  = total_ct;
563   baij->ht_insert_ct = insert_ct;
564 #endif
565   PetscFunctionReturn(0);
566 }
567 
568 #undef __FUNCT__
569 #define __FUNCT__ "MatGetValues_MPIBAIJ"
570 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
571 {
572   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
573   PetscErrorCode ierr;
574   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
575   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
576 
577   PetscFunctionBegin;
578   for (i=0; i<m; i++) {
579     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
580     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
581     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
582       row = idxm[i] - bsrstart;
583       for (j=0; j<n; j++) {
584         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
585         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
586         if (idxn[j] >= bscstart && idxn[j] < bscend) {
587           col  = idxn[j] - bscstart;
588           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589         } else {
590           if (!baij->colmap) {
591             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
592           }
593 #if defined(PETSC_USE_CTABLE)
594           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
595           data--;
596 #else
597           data = baij->colmap[idxn[j]/bs]-1;
598 #endif
599           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
600           else {
601             col  = data + idxn[j]%bs;
602             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603           }
604         }
605       }
606     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
607   }
608   PetscFunctionReturn(0);
609 }
610 
611 #undef __FUNCT__
612 #define __FUNCT__ "MatNorm_MPIBAIJ"
613 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
614 {
615   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
616   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
617   PetscErrorCode ierr;
618   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
619   PetscReal      sum = 0.0;
620   MatScalar      *v;
621 
622   PetscFunctionBegin;
623   if (baij->size == 1) {
624     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
625   } else {
626     if (type == NORM_FROBENIUS) {
627       v  = amat->a;
628       nz = amat->nz*bs2;
629       for (i=0; i<nz; i++) {
630         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
631       }
632       v  = bmat->a;
633       nz = bmat->nz*bs2;
634       for (i=0; i<nz; i++) {
635         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
636       }
637       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
638       *nrm = PetscSqrtReal(*nrm);
639     } else if (type == NORM_1) { /* max column sum */
640       PetscReal *tmp,*tmp2;
641       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
642       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
643       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
644       v    = amat->a; jj = amat->j;
645       for (i=0; i<amat->nz; i++) {
646         for (j=0; j<bs; j++) {
647           col = bs*(cstart + *jj) + j; /* column index */
648           for (row=0; row<bs; row++) {
649             tmp[col] += PetscAbsScalar(*v);  v++;
650           }
651         }
652         jj++;
653       }
654       v = bmat->a; jj = bmat->j;
655       for (i=0; i<bmat->nz; i++) {
656         for (j=0; j<bs; j++) {
657           col = bs*garray[*jj] + j;
658           for (row=0; row<bs; row++) {
659             tmp[col] += PetscAbsScalar(*v); v++;
660           }
661         }
662         jj++;
663       }
664       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
665       *nrm = 0.0;
666       for (j=0; j<mat->cmap->N; j++) {
667         if (tmp2[j] > *nrm) *nrm = tmp2[j];
668       }
669       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
670     } else if (type == NORM_INFINITY) { /* max row sum */
671       PetscReal *sums;
672       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
673       sum  = 0.0;
674       for (j=0; j<amat->mbs; j++) {
675         for (row=0; row<bs; row++) sums[row] = 0.0;
676         v  = amat->a + bs2*amat->i[j];
677         nz = amat->i[j+1]-amat->i[j];
678         for (i=0; i<nz; i++) {
679           for (col=0; col<bs; col++) {
680             for (row=0; row<bs; row++) {
681               sums[row] += PetscAbsScalar(*v); v++;
682             }
683           }
684         }
685         v  = bmat->a + bs2*bmat->i[j];
686         nz = bmat->i[j+1]-bmat->i[j];
687         for (i=0; i<nz; i++) {
688           for (col=0; col<bs; col++) {
689             for (row=0; row<bs; row++) {
690               sums[row] += PetscAbsScalar(*v); v++;
691             }
692           }
693         }
694         for (row=0; row<bs; row++) {
695           if (sums[row] > sum) sum = sums[row];
696         }
697       }
698       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
699       ierr = PetscFree(sums);CHKERRQ(ierr);
700     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
701   }
702   PetscFunctionReturn(0);
703 }
704 
705 /*
706   Creates the hash table, and sets the table
707   This table is created only once.
708   If new entried need to be added to the matrix
709   then the hash table has to be destroyed and
710   recreated.
711 */
712 #undef __FUNCT__
713 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
714 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
715 {
716   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
717   Mat            A     = baij->A,B=baij->B;
718   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
719   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
720   PetscErrorCode ierr;
721   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
722   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
723   PetscInt       *HT,key;
724   MatScalar      **HD;
725   PetscReal      tmp;
726 #if defined(PETSC_USE_INFO)
727   PetscInt ct=0,max=0;
728 #endif
729 
730   PetscFunctionBegin;
731   if (baij->ht) PetscFunctionReturn(0);
732 
733   baij->ht_size = (PetscInt)(factor*nz);
734   ht_size       = baij->ht_size;
735 
736   /* Allocate Memory for Hash Table */
737   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
738   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
739   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
740   HD   = baij->hd;
741   HT   = baij->ht;
742 
743   /* Loop Over A */
744   for (i=0; i<a->mbs; i++) {
745     for (j=ai[i]; j<ai[i+1]; j++) {
746       row = i+rstart;
747       col = aj[j]+cstart;
748 
749       key = row*Nbs + col + 1;
750       h1  = HASH(ht_size,key,tmp);
751       for (k=0; k<ht_size; k++) {
752         if (!HT[(h1+k)%ht_size]) {
753           HT[(h1+k)%ht_size] = key;
754           HD[(h1+k)%ht_size] = a->a + j*bs2;
755           break;
756 #if defined(PETSC_USE_INFO)
757         } else {
758           ct++;
759 #endif
760         }
761       }
762 #if defined(PETSC_USE_INFO)
763       if (k> max) max = k;
764 #endif
765     }
766   }
767   /* Loop Over B */
768   for (i=0; i<b->mbs; i++) {
769     for (j=bi[i]; j<bi[i+1]; j++) {
770       row = i+rstart;
771       col = garray[bj[j]];
772       key = row*Nbs + col + 1;
773       h1  = HASH(ht_size,key,tmp);
774       for (k=0; k<ht_size; k++) {
775         if (!HT[(h1+k)%ht_size]) {
776           HT[(h1+k)%ht_size] = key;
777           HD[(h1+k)%ht_size] = b->a + j*bs2;
778           break;
779 #if defined(PETSC_USE_INFO)
780         } else {
781           ct++;
782 #endif
783         }
784       }
785 #if defined(PETSC_USE_INFO)
786       if (k> max) max = k;
787 #endif
788     }
789   }
790 
791   /* Print Summary */
792 #if defined(PETSC_USE_INFO)
793   for (i=0,j=0; i<ht_size; i++) {
794     if (HT[i]) j++;
795   }
796   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
797 #endif
798   PetscFunctionReturn(0);
799 }
800 
801 #undef __FUNCT__
802 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
803 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
804 {
805   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
806   PetscErrorCode ierr;
807   PetscInt       nstash,reallocs;
808   InsertMode     addv;
809 
810   PetscFunctionBegin;
811   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
812 
813   /* make sure all processors are either in INSERTMODE or ADDMODE */
814   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
815   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
816   mat->insertmode = addv; /* in case this processor had no cache */
817 
818   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
819   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
820   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
821   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
822   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
823   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
824   PetscFunctionReturn(0);
825 }
826 
827 #undef __FUNCT__
828 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
829 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
830 {
831   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
832   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
833   PetscErrorCode ierr;
834   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
835   PetscInt       *row,*col;
836   PetscBool      r1,r2,r3,other_disassembled;
837   MatScalar      *val;
838   InsertMode     addv = mat->insertmode;
839   PetscMPIInt    n;
840 
841   PetscFunctionBegin;
842   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
843   if (!baij->donotstash && !mat->nooffprocentries) {
844     while (1) {
845       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
846       if (!flg) break;
847 
848       for (i=0; i<n;) {
849         /* Now identify the consecutive vals belonging to the same row */
850         for (j=i,rstart=row[j]; j<n; j++) {
851           if (row[j] != rstart) break;
852         }
853         if (j < n) ncols = j-i;
854         else       ncols = n-i;
855         /* Now assemble all these values with a single function call */
856         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
857         i    = j;
858       }
859     }
860     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
861     /* Now process the block-stash. Since the values are stashed column-oriented,
862        set the roworiented flag to column oriented, and after MatSetValues()
863        restore the original flags */
864     r1 = baij->roworiented;
865     r2 = a->roworiented;
866     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
867 
868     baij->roworiented = PETSC_FALSE;
869     a->roworiented    = PETSC_FALSE;
870 
871     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
872     while (1) {
873       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
874       if (!flg) break;
875 
876       for (i=0; i<n;) {
877         /* Now identify the consecutive vals belonging to the same row */
878         for (j=i,rstart=row[j]; j<n; j++) {
879           if (row[j] != rstart) break;
880         }
881         if (j < n) ncols = j-i;
882         else       ncols = n-i;
883         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
884         i    = j;
885       }
886     }
887     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
888 
889     baij->roworiented = r1;
890     a->roworiented    = r2;
891 
892     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
893   }
894 
895   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
896   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
897 
898   /* determine if any processor has disassembled, if so we must
899      also disassemble ourselfs, in order that we may reassemble. */
900   /*
901      if nonzero structure of submatrix B cannot change then we know that
902      no processor disassembled thus we can skip this stuff
903   */
904   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
905     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
906     if (mat->was_assembled && !other_disassembled) {
907       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
908     }
909   }
910 
911   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
912     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
913   }
914   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
915   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
916   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
917 
918 #if defined(PETSC_USE_INFO)
919   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
920     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
921 
922     baij->ht_total_ct  = 0;
923     baij->ht_insert_ct = 0;
924   }
925 #endif
926   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
927     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
928 
929     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
930     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
931   }
932 
933   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
934 
935   baij->rowvalues = 0;
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
941 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
942 {
943   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
944   PetscErrorCode    ierr;
945   PetscMPIInt       size = baij->size,rank = baij->rank;
946   PetscInt          bs   = mat->rmap->bs;
947   PetscBool         iascii,isdraw;
948   PetscViewer       sviewer;
949   PetscViewerFormat format;
950 
951   PetscFunctionBegin;
952   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
953   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
954   if (iascii) {
955     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
956     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
957       MatInfo info;
958       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
959       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
960       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
961       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
962                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
963       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
964       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
965       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
966       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
967       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
968       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
969       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
970       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
971       PetscFunctionReturn(0);
972     } else if (format == PETSC_VIEWER_ASCII_INFO) {
973       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
974       PetscFunctionReturn(0);
975     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
976       PetscFunctionReturn(0);
977     }
978   }
979 
980   if (isdraw) {
981     PetscDraw draw;
982     PetscBool isnull;
983     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
984     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
985   }
986 
987   if (size == 1) {
988     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
989     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
990   } else {
991     /* assemble the entire matrix onto first processor. */
992     Mat         A;
993     Mat_SeqBAIJ *Aloc;
994     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
995     MatScalar   *a;
996 
997     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
998     /* Perhaps this should be the type of mat? */
999     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1000     if (!rank) {
1001       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1002     } else {
1003       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1004     }
1005     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1006     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1007     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1008     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1009 
1010     /* copy over the A part */
1011     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1012     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1013     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1014 
1015     for (i=0; i<mbs; i++) {
1016       rvals[0] = bs*(baij->rstartbs + i);
1017       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1018       for (j=ai[i]; j<ai[i+1]; j++) {
1019         col = (baij->cstartbs+aj[j])*bs;
1020         for (k=0; k<bs; k++) {
1021           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1022           col++; a += bs;
1023         }
1024       }
1025     }
1026     /* copy over the B part */
1027     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1028     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1029     for (i=0; i<mbs; i++) {
1030       rvals[0] = bs*(baij->rstartbs + i);
1031       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1032       for (j=ai[i]; j<ai[i+1]; j++) {
1033         col = baij->garray[aj[j]]*bs;
1034         for (k=0; k<bs; k++) {
1035           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1036           col++; a += bs;
1037         }
1038       }
1039     }
1040     ierr = PetscFree(rvals);CHKERRQ(ierr);
1041     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1042     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1043     /*
1044        Everyone has to call to draw the matrix since the graphics waits are
1045        synchronized across all processors that share the PetscDraw object
1046     */
1047     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1048     if (!rank) {
1049       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1050       /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1051       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1052       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1053     }
1054     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1055     ierr = MatDestroy(&A);CHKERRQ(ierr);
1056   }
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1062 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1063 {
1064   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1065   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1066   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1067   PetscErrorCode ierr;
1068   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1069   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1070   int            fd;
1071   PetscScalar    *column_values;
1072   FILE           *file;
1073   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1074   PetscInt       message_count,flowcontrolcount;
1075 
1076   PetscFunctionBegin;
1077   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1078   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1079   nz   = bs2*(A->nz + B->nz);
1080   rlen = mat->rmap->n;
1081   if (!rank) {
1082     header[0] = MAT_FILE_CLASSID;
1083     header[1] = mat->rmap->N;
1084     header[2] = mat->cmap->N;
1085 
1086     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1087     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1088     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1089     /* get largest number of rows any processor has */
1090     range = mat->rmap->range;
1091     for (i=1; i<size; i++) {
1092       rlen = PetscMax(rlen,range[i+1] - range[i]);
1093     }
1094   } else {
1095     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1096   }
1097 
1098   ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1099   /* compute lengths of each row  */
1100   for (i=0; i<a->mbs; i++) {
1101     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1102   }
1103   /* store the row lengths to the file */
1104   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1105   if (!rank) {
1106     MPI_Status status;
1107     ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1108     rlen = (range[1] - range[0])/bs;
1109     for (i=0; i<rlen; i++) {
1110       for (j=0; j<bs; j++) {
1111         row_lens[i*bs+j] = bs*crow_lens[i];
1112       }
1113     }
1114     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1115     for (i=1; i<size; i++) {
1116       rlen = (range[i+1] - range[i])/bs;
1117       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1118       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1119       for (k=0; k<rlen; k++) {
1120         for (j=0; j<bs; j++) {
1121           row_lens[k*bs+j] = bs*crow_lens[k];
1122         }
1123       }
1124       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1125     }
1126     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1127     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1128   } else {
1129     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1130     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1131     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1132   }
1133   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1134 
1135   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1136      information needed to make it for each row from a block row. This does require more communication but still not more than
1137      the communication needed for the nonzero values  */
1138   nzmax = nz; /*  space a largest processor needs */
1139   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1140   ierr  = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1141   cnt   = 0;
1142   for (i=0; i<a->mbs; i++) {
1143     pcnt = cnt;
1144     for (j=B->i[i]; j<B->i[i+1]; j++) {
1145       if ((col = garray[B->j[j]]) > cstart) break;
1146       for (l=0; l<bs; l++) {
1147         column_indices[cnt++] = bs*col+l;
1148       }
1149     }
1150     for (k=A->i[i]; k<A->i[i+1]; k++) {
1151       for (l=0; l<bs; l++) {
1152         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1153       }
1154     }
1155     for (; j<B->i[i+1]; j++) {
1156       for (l=0; l<bs; l++) {
1157         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1158       }
1159     }
1160     len = cnt - pcnt;
1161     for (k=1; k<bs; k++) {
1162       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1163       cnt += len;
1164     }
1165   }
1166   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1167 
1168   /* store the columns to the file */
1169   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1170   if (!rank) {
1171     MPI_Status status;
1172     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1173     for (i=1; i<size; i++) {
1174       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1175       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1176       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1177       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1178     }
1179     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1180   } else {
1181     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1182     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1183     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1184     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1185   }
1186   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1187 
1188   /* load up the numerical values */
1189   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1190   cnt  = 0;
1191   for (i=0; i<a->mbs; i++) {
1192     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1193     for (j=B->i[i]; j<B->i[i+1]; j++) {
1194       if (garray[B->j[j]] > cstart) break;
1195       for (l=0; l<bs; l++) {
1196         for (ll=0; ll<bs; ll++) {
1197           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1198         }
1199       }
1200       cnt += bs;
1201     }
1202     for (k=A->i[i]; k<A->i[i+1]; k++) {
1203       for (l=0; l<bs; l++) {
1204         for (ll=0; ll<bs; ll++) {
1205           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1206         }
1207       }
1208       cnt += bs;
1209     }
1210     for (; j<B->i[i+1]; j++) {
1211       for (l=0; l<bs; l++) {
1212         for (ll=0; ll<bs; ll++) {
1213           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1214         }
1215       }
1216       cnt += bs;
1217     }
1218     cnt += (bs-1)*rlen;
1219   }
1220   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1221 
1222   /* store the column values to the file */
1223   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1224   if (!rank) {
1225     MPI_Status status;
1226     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1227     for (i=1; i<size; i++) {
1228       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1229       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1230       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1231       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1232     }
1233     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1234   } else {
1235     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1236     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1237     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1238     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1239   }
1240   ierr = PetscFree(column_values);CHKERRQ(ierr);
1241 
1242   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1243   if (file) {
1244     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1245   }
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatView_MPIBAIJ"
1251 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1252 {
1253   PetscErrorCode ierr;
1254   PetscBool      iascii,isdraw,issocket,isbinary;
1255 
1256   PetscFunctionBegin;
1257   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1258   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1259   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1260   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1261   if (iascii || isdraw || issocket) {
1262     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1263   } else if (isbinary) {
1264     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1265   }
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1271 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1272 {
1273   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1274   PetscErrorCode ierr;
1275 
1276   PetscFunctionBegin;
1277 #if defined(PETSC_USE_LOG)
1278   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1279 #endif
1280   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1281   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1282   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1283   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1284 #if defined(PETSC_USE_CTABLE)
1285   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1286 #else
1287   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1288 #endif
1289   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1290   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1291   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1292   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1293   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1294   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1295   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1296   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",NULL);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatMult_MPIBAIJ"
1313 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1314 {
1315   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1316   PetscErrorCode ierr;
1317   PetscInt       nt;
1318 
1319   PetscFunctionBegin;
1320   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1321   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1322   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1323   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1324   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1325   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1326   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1327   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 #undef __FUNCT__
1332 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1333 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1334 {
1335   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1340   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1341   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1342   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1348 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1349 {
1350   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1351   PetscErrorCode ierr;
1352   PetscBool      merged;
1353 
1354   PetscFunctionBegin;
1355   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1356   /* do nondiagonal part */
1357   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1358   if (!merged) {
1359     /* send it on its way */
1360     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1361     /* do local part */
1362     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1363     /* receive remote parts: note this assumes the values are not actually */
1364     /* inserted in yy until the next line */
1365     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1366   } else {
1367     /* do local part */
1368     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1369     /* send it on its way */
1370     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1371     /* values actually were received in the Begin() but we need to call this nop */
1372     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1373   }
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1379 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1380 {
1381   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1382   PetscErrorCode ierr;
1383 
1384   PetscFunctionBegin;
1385   /* do nondiagonal part */
1386   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1387   /* send it on its way */
1388   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1389   /* do local part */
1390   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1391   /* receive remote parts: note this assumes the values are not actually */
1392   /* inserted in yy until the next line, which is true for my implementation*/
1393   /* but is not perhaps always true. */
1394   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 /*
1399   This only works correctly for square matrices where the subblock A->A is the
1400    diagonal block
1401 */
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1404 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1405 {
1406   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1407   PetscErrorCode ierr;
1408 
1409   PetscFunctionBegin;
1410   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1411   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatScale_MPIBAIJ"
1417 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1418 {
1419   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1420   PetscErrorCode ierr;
1421 
1422   PetscFunctionBegin;
1423   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1424   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1430 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1431 {
1432   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1433   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1434   PetscErrorCode ierr;
1435   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1436   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1437   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1438 
1439   PetscFunctionBegin;
1440   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1441   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1442   mat->getrowactive = PETSC_TRUE;
1443 
1444   if (!mat->rowvalues && (idx || v)) {
1445     /*
1446         allocate enough space to hold information from the longest row.
1447     */
1448     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1449     PetscInt    max = 1,mbs = mat->mbs,tmp;
1450     for (i=0; i<mbs; i++) {
1451       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1452       if (max < tmp) max = tmp;
1453     }
1454     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1455   }
1456   lrow = row - brstart;
1457 
1458   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1459   if (!v)   {pvA = 0; pvB = 0;}
1460   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1461   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1462   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1463   nztot = nzA + nzB;
1464 
1465   cmap = mat->garray;
1466   if (v  || idx) {
1467     if (nztot) {
1468       /* Sort by increasing column numbers, assuming A and B already sorted */
1469       PetscInt imark = -1;
1470       if (v) {
1471         *v = v_p = mat->rowvalues;
1472         for (i=0; i<nzB; i++) {
1473           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1474           else break;
1475         }
1476         imark = i;
1477         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1478         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1479       }
1480       if (idx) {
1481         *idx = idx_p = mat->rowindices;
1482         if (imark > -1) {
1483           for (i=0; i<imark; i++) {
1484             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1485           }
1486         } else {
1487           for (i=0; i<nzB; i++) {
1488             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1489             else break;
1490           }
1491           imark = i;
1492         }
1493         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1494         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1495       }
1496     } else {
1497       if (idx) *idx = 0;
1498       if (v)   *v   = 0;
1499     }
1500   }
1501   *nz  = nztot;
1502   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1503   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 #undef __FUNCT__
1508 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1509 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1510 {
1511   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1512 
1513   PetscFunctionBegin;
1514   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1515   baij->getrowactive = PETSC_FALSE;
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 #undef __FUNCT__
1520 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1521 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1522 {
1523   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1524   PetscErrorCode ierr;
1525 
1526   PetscFunctionBegin;
1527   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1528   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1534 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1535 {
1536   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1537   Mat            A  = a->A,B = a->B;
1538   PetscErrorCode ierr;
1539   PetscReal      isend[5],irecv[5];
1540 
1541   PetscFunctionBegin;
1542   info->block_size = (PetscReal)matin->rmap->bs;
1543 
1544   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1545 
1546   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1547   isend[3] = info->memory;  isend[4] = info->mallocs;
1548 
1549   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1550 
1551   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1552   isend[3] += info->memory;  isend[4] += info->mallocs;
1553 
1554   if (flag == MAT_LOCAL) {
1555     info->nz_used      = isend[0];
1556     info->nz_allocated = isend[1];
1557     info->nz_unneeded  = isend[2];
1558     info->memory       = isend[3];
1559     info->mallocs      = isend[4];
1560   } else if (flag == MAT_GLOBAL_MAX) {
1561     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1562 
1563     info->nz_used      = irecv[0];
1564     info->nz_allocated = irecv[1];
1565     info->nz_unneeded  = irecv[2];
1566     info->memory       = irecv[3];
1567     info->mallocs      = irecv[4];
1568   } else if (flag == MAT_GLOBAL_SUM) {
1569     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1570 
1571     info->nz_used      = irecv[0];
1572     info->nz_allocated = irecv[1];
1573     info->nz_unneeded  = irecv[2];
1574     info->memory       = irecv[3];
1575     info->mallocs      = irecv[4];
1576   } else SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1577   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1578   info->fill_ratio_needed = 0;
1579   info->factor_mallocs    = 0;
1580   PetscFunctionReturn(0);
1581 }
1582 
1583 #undef __FUNCT__
1584 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1585 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1586 {
1587   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1588   PetscErrorCode ierr;
1589 
1590   PetscFunctionBegin;
1591   switch (op) {
1592   case MAT_NEW_NONZERO_LOCATIONS:
1593   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1594   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1595   case MAT_KEEP_NONZERO_PATTERN:
1596   case MAT_NEW_NONZERO_LOCATION_ERR:
1597     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1598     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1599     break;
1600   case MAT_ROW_ORIENTED:
1601     a->roworiented = flg;
1602 
1603     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1604     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1605     break;
1606   case MAT_NEW_DIAGONALS:
1607     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1608     break;
1609   case MAT_IGNORE_OFF_PROC_ENTRIES:
1610     a->donotstash = flg;
1611     break;
1612   case MAT_USE_HASH_TABLE:
1613     a->ht_flag = flg;
1614     break;
1615   case MAT_SYMMETRIC:
1616   case MAT_STRUCTURALLY_SYMMETRIC:
1617   case MAT_HERMITIAN:
1618   case MAT_SYMMETRY_ETERNAL:
1619     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1620     break;
1621   default:
1622     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1623   }
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 #undef __FUNCT__
1628 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1629 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1630 {
1631   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1632   Mat_SeqBAIJ    *Aloc;
1633   Mat            B;
1634   PetscErrorCode ierr;
1635   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1636   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1637   MatScalar      *a;
1638 
1639   PetscFunctionBegin;
1640   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1641   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1642     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1643     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1644     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1645     /* Do not know preallocation information, but must set block size */
1646     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1647   } else {
1648     B = *matout;
1649   }
1650 
1651   /* copy over the A part */
1652   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1653   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1654   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1655 
1656   for (i=0; i<mbs; i++) {
1657     rvals[0] = bs*(baij->rstartbs + i);
1658     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1659     for (j=ai[i]; j<ai[i+1]; j++) {
1660       col = (baij->cstartbs+aj[j])*bs;
1661       for (k=0; k<bs; k++) {
1662         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1663 
1664         col++; a += bs;
1665       }
1666     }
1667   }
1668   /* copy over the B part */
1669   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1670   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1671   for (i=0; i<mbs; i++) {
1672     rvals[0] = bs*(baij->rstartbs + i);
1673     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1674     for (j=ai[i]; j<ai[i+1]; j++) {
1675       col = baij->garray[aj[j]]*bs;
1676       for (k=0; k<bs; k++) {
1677         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1678         col++;
1679         a += bs;
1680       }
1681     }
1682   }
1683   ierr = PetscFree(rvals);CHKERRQ(ierr);
1684   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1685   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1686 
1687   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1688   else {
1689     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1690   }
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1696 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1697 {
1698   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1699   Mat            a     = baij->A,b = baij->B;
1700   PetscErrorCode ierr;
1701   PetscInt       s1,s2,s3;
1702 
1703   PetscFunctionBegin;
1704   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1705   if (rr) {
1706     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1707     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1708     /* Overlap communication with computation. */
1709     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1710   }
1711   if (ll) {
1712     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1713     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1714     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1715   }
1716   /* scale  the diagonal block */
1717   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1718 
1719   if (rr) {
1720     /* Do a scatter end and then right scale the off-diagonal block */
1721     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1722     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1723   }
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1729 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1730 {
1731   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1732   PetscErrorCode    ierr;
1733   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1734   PetscInt          i,*owners = A->rmap->range;
1735   PetscInt          *nprocs,j,idx,nsends,row;
1736   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1737   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1738   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1739   MPI_Comm          comm = ((PetscObject)A)->comm;
1740   MPI_Request       *send_waits,*recv_waits;
1741   MPI_Status        recv_status,*send_status;
1742   const PetscScalar *xx;
1743   PetscScalar       *bb;
1744 #if defined(PETSC_DEBUG)
1745   PetscBool found = PETSC_FALSE;
1746 #endif
1747 
1748   PetscFunctionBegin;
1749   /*  first count number of contributors to each processor */
1750   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1751   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1752   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);  /* see note*/
1753   j    = 0;
1754   for (i=0; i<N; i++) {
1755     if (lastidx > (idx = rows[i])) j = 0;
1756     lastidx = idx;
1757     for (; j<size; j++) {
1758       if (idx >= owners[j] && idx < owners[j+1]) {
1759         nprocs[2*j]++;
1760         nprocs[2*j+1] = 1;
1761         owner[i]      = j;
1762 #if defined(PETSC_DEBUG)
1763         found = PETSC_TRUE;
1764 #endif
1765         break;
1766       }
1767     }
1768 #if defined(PETSC_DEBUG)
1769     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1770     found = PETSC_FALSE;
1771 #endif
1772   }
1773   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
1774 
1775   if (A->nooffproczerorows) {
1776     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1777     nrecvs = nsends;
1778     nmax   = N;
1779   } else {
1780     /* inform other processors of number of messages and max length*/
1781     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1782   }
1783 
1784   /* post receives:   */
1785   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1786   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1787   for (i=0; i<nrecvs; i++) {
1788     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1789   }
1790 
1791   /* do sends:
1792      1) starts[i] gives the starting index in svalues for stuff going to
1793      the ith processor
1794   */
1795   ierr      = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1796   ierr      = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1797   ierr      = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1798   starts[0] = 0;
1799   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1800   for (i=0; i<N; i++) {
1801     svalues[starts[owner[i]]++] = rows[i];
1802   }
1803 
1804   starts[0] = 0;
1805   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1806   count = 0;
1807   for (i=0; i<size; i++) {
1808     if (nprocs[2*i+1]) {
1809       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1810     }
1811   }
1812   ierr = PetscFree(starts);CHKERRQ(ierr);
1813 
1814   base = owners[rank];
1815 
1816   /*  wait on receives */
1817   ierr  = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1818   count = nrecvs;
1819   slen  = 0;
1820   while (count) {
1821     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1822     /* unpack receives into our local space */
1823     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1824 
1825     source[imdex] = recv_status.MPI_SOURCE;
1826     lens[imdex]   = n;
1827     slen         += n;
1828     count--;
1829   }
1830   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1831 
1832   /* move the data into the send scatter */
1833   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1834   count = 0;
1835   for (i=0; i<nrecvs; i++) {
1836     values = rvalues + i*nmax;
1837     for (j=0; j<lens[i]; j++) {
1838       lrows[count++] = values[j] - base;
1839     }
1840   }
1841   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1842   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1843   ierr = PetscFree(owner);CHKERRQ(ierr);
1844   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1845 
1846   /* fix right hand side if needed */
1847   if (x && b) {
1848     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1849     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1850     for (i=0; i<slen; i++) {
1851       bb[lrows[i]] = diag*xx[lrows[i]];
1852     }
1853     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1854     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1855   }
1856 
1857   /* actually zap the local rows */
1858   /*
1859         Zero the required rows. If the "diagonal block" of the matrix
1860      is square and the user wishes to set the diagonal we use separate
1861      code so that MatSetValues() is not called for each diagonal allocating
1862      new memory, thus calling lots of mallocs and slowing things down.
1863 
1864   */
1865   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1866   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1867   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1868     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1869   } else if (diag != 0.0) {
1870     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1871     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1872        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1873     for (i=0; i<slen; i++) {
1874       row  = lrows[i] + rstart_bs;
1875       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1876     }
1877     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1878     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1879   } else {
1880     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1881   }
1882 
1883   ierr = PetscFree(lrows);CHKERRQ(ierr);
1884 
1885   /* wait on sends */
1886   if (nsends) {
1887     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1888     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1889     ierr = PetscFree(send_status);CHKERRQ(ierr);
1890   }
1891   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1892   ierr = PetscFree(svalues);CHKERRQ(ierr);
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1898 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1899 {
1900   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1901   PetscErrorCode ierr;
1902 
1903   PetscFunctionBegin;
1904   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1909 
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatEqual_MPIBAIJ"
1912 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1913 {
1914   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1915   Mat            a,b,c,d;
1916   PetscBool      flg;
1917   PetscErrorCode ierr;
1918 
1919   PetscFunctionBegin;
1920   a = matA->A; b = matA->B;
1921   c = matB->A; d = matB->B;
1922 
1923   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1924   if (flg) {
1925     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1926   }
1927   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatCopy_MPIBAIJ"
1933 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1934 {
1935   PetscErrorCode ierr;
1936   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1937   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1938 
1939   PetscFunctionBegin;
1940   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1941   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1942     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1943   } else {
1944     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1945     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1946   }
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1952 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1953 {
1954   PetscErrorCode ierr;
1955 
1956   PetscFunctionBegin;
1957   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1963 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1964 {
1965   PetscErrorCode ierr;
1966   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1967   PetscBLASInt   bnz,one=1;
1968   Mat_SeqBAIJ    *x,*y;
1969 
1970   PetscFunctionBegin;
1971   if (str == SAME_NONZERO_PATTERN) {
1972     PetscScalar alpha = a;
1973     x    = (Mat_SeqBAIJ*)xx->A->data;
1974     y    = (Mat_SeqBAIJ*)yy->A->data;
1975     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1976     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1977     x    = (Mat_SeqBAIJ*)xx->B->data;
1978     y    = (Mat_SeqBAIJ*)yy->B->data;
1979     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1980     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1981   } else {
1982     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1983   }
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 #undef __FUNCT__
1988 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1989 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1990 {
1991   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1996   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 #undef __FUNCT__
2001 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2002 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2003 {
2004   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2005   PetscErrorCode ierr;
2006 
2007   PetscFunctionBegin;
2008   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2009   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 #undef __FUNCT__
2014 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2015 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2016 {
2017   PetscErrorCode ierr;
2018   IS             iscol_local;
2019   PetscInt       csize;
2020 
2021   PetscFunctionBegin;
2022   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2023   if (call == MAT_REUSE_MATRIX) {
2024     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2025     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2026   } else {
2027     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2028   }
2029   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2030   if (call == MAT_INITIAL_MATRIX) {
2031     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2032     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2033   }
2034   PetscFunctionReturn(0);
2035 }
2036 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2039 /*
2040   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2041   in local and then by concatenating the local matrices the end result.
2042   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2043 */
2044 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2045 {
2046   PetscErrorCode ierr;
2047   PetscMPIInt    rank,size;
2048   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2049   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2050   Mat            M,Mreuse;
2051   MatScalar      *vwork,*aa;
2052   MPI_Comm       comm = ((PetscObject)mat)->comm;
2053   IS             isrow_new, iscol_new;
2054   PetscBool      idflag,allrows, allcols;
2055   Mat_SeqBAIJ    *aij;
2056 
2057   PetscFunctionBegin;
2058   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2059   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2060   /* The compression and expansion should be avoided. Doesn't point
2061      out errors, might change the indices, hence buggey */
2062   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2063   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2064 
2065   /* Check for special case: each processor gets entire matrix columns */
2066   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2067   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2068   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2069   else allcols = PETSC_FALSE;
2070 
2071   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2072   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2073   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2074   else allrows = PETSC_FALSE;
2075 
2076   if (call ==  MAT_REUSE_MATRIX) {
2077     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2078     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2079     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2080   } else {
2081     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2082   }
2083   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2084   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2085   /*
2086       m - number of local rows
2087       n - number of columns (same on all processors)
2088       rstart - first row in new global matrix generated
2089   */
2090   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2091   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2092   m    = m/bs;
2093   n    = n/bs;
2094 
2095   if (call == MAT_INITIAL_MATRIX) {
2096     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2097     ii  = aij->i;
2098     jj  = aij->j;
2099 
2100     /*
2101         Determine the number of non-zeros in the diagonal and off-diagonal
2102         portions of the matrix in order to do correct preallocation
2103     */
2104 
2105     /* first get start and end of "diagonal" columns */
2106     if (csize == PETSC_DECIDE) {
2107       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2108       if (mglobal == n*bs) { /* square matrix */
2109         nlocal = m;
2110       } else {
2111         nlocal = n/size + ((n % size) > rank);
2112       }
2113     } else {
2114       nlocal = csize/bs;
2115     }
2116     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2117     rstart = rend - nlocal;
2118     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2119 
2120     /* next, compute all the lengths */
2121     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2122     olens = dlens + m;
2123     for (i=0; i<m; i++) {
2124       jend = ii[i+1] - ii[i];
2125       olen = 0;
2126       dlen = 0;
2127       for (j=0; j<jend; j++) {
2128         if (*jj < rstart || *jj >= rend) olen++;
2129         else dlen++;
2130         jj++;
2131       }
2132       olens[i] = olen;
2133       dlens[i] = dlen;
2134     }
2135     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2136     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2137     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2138     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2139     ierr = PetscFree(dlens);CHKERRQ(ierr);
2140   } else {
2141     PetscInt ml,nl;
2142 
2143     M    = *newmat;
2144     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2145     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2146     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2147     /*
2148          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2149        rather than the slower MatSetValues().
2150     */
2151     M->was_assembled = PETSC_TRUE;
2152     M->assembled     = PETSC_FALSE;
2153   }
2154   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2155   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2156   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2157   ii   = aij->i;
2158   jj   = aij->j;
2159   aa   = aij->a;
2160   for (i=0; i<m; i++) {
2161     row   = rstart/bs + i;
2162     nz    = ii[i+1] - ii[i];
2163     cwork = jj;     jj += nz;
2164     vwork = aa;     aa += nz*bs*bs;
2165     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2166   }
2167 
2168   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2169   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2170   *newmat = M;
2171 
2172   /* save submatrix used in processor for next request */
2173   if (call ==  MAT_INITIAL_MATRIX) {
2174     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2175     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2176   }
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 #undef __FUNCT__
2181 #define __FUNCT__ "MatPermute_MPIBAIJ"
2182 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2183 {
2184   MPI_Comm       comm,pcomm;
2185   PetscInt       first,rlocal_size,clocal_size,nrows;
2186   const PetscInt *rows;
2187   PetscMPIInt    size;
2188   IS             crowp,growp,irowp,lrowp,lcolp;
2189   PetscErrorCode ierr;
2190 
2191   PetscFunctionBegin;
2192   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2193   /* make a collective version of 'rowp' */
2194   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2195   if (pcomm==comm) {
2196     crowp = rowp;
2197   } else {
2198     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2199     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2200     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2201     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2202   }
2203   /* collect the global row permutation and invert it */
2204   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2205   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2206   if (pcomm!=comm) {
2207     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2208   }
2209   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2210   ierr = ISDestroy(&growp);CHKERRQ(ierr);
2211   /* get the local target indices */
2212   ierr = MatGetOwnershipRange(A,&first,NULL);CHKERRQ(ierr);
2213   ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr);
2214   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2215   ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2216   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2217   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2218   /* the column permutation is so much easier;
2219      make a local version of 'colp' and invert it */
2220   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2221   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2222   if (size==1) {
2223     lcolp = colp;
2224   } else {
2225     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2226   }
2227   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2228   /* now we just get the submatrix */
2229   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2230   if (size>1) {
2231     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2232   }
2233   /* clean up */
2234   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2240 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2241 {
2242   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2243   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2244 
2245   PetscFunctionBegin;
2246   if (nghosts) *nghosts = B->nbs;
2247   if (ghosts) *ghosts = baij->garray;
2248   PetscFunctionReturn(0);
2249 }
2250 
2251 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2255 /*
2256     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2257 */
2258 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2259 {
2260   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2261   PetscErrorCode         ierr;
2262   PetscMPIInt            size,*ncolsonproc,*disp,nn;
2263   PetscInt               bs,i,n,nrows,j,k,m,ncols,col;
2264   const PetscInt         *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj,*ltog;
2265   PetscInt               nis = iscoloring->n,nctot,*cols;
2266   PetscInt               *rowhit,M,cstart,cend,colb;
2267   PetscInt               *columnsforrow,l;
2268   IS                     *isa;
2269   PetscBool              done,flg;
2270   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2271   PetscInt               ctype=c->ctype;
2272 
2273   PetscFunctionBegin;
2274   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2275   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2276 
2277   if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,&ltog);CHKERRQ(ierr);}
2278   else     ltog = NULL;
2279   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2280   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2281 
2282   M         = mat->rmap->n/bs;
2283   cstart    = mat->cmap->rstart/bs;
2284   cend      = mat->cmap->rend/bs;
2285   c->M      = mat->rmap->N/bs;         /* set the global rows and columns and local rows */
2286   c->N      = mat->cmap->N/bs;
2287   c->m      = mat->rmap->n/bs;
2288   c->rstart = mat->rmap->rstart/bs;
2289 
2290   c->ncolors = nis;
2291   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2292   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2293   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2294   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2295   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2296   ierr       = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2297 
2298   /* Allow access to data structures of local part of matrix */
2299   if (!baij->colmap) {
2300     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2301   }
2302   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2303   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2304 
2305   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2306   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2307 
2308   for (i=0; i<nis; i++) {
2309     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2310     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2311 
2312     c->ncolumns[i] = n;
2313     if (n) {
2314       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2315       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2316       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2317     } else {
2318       c->columns[i] = 0;
2319     }
2320 
2321     if (ctype == IS_COLORING_GLOBAL) {
2322       /* Determine the total (parallel) number of columns of this color */
2323       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2324       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2325 
2326       ierr  = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
2327       ierr  = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2328       nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j];
2329       if (!nctot) {
2330         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2331       }
2332 
2333       disp[0] = 0;
2334       for (j=1; j<size; j++) {
2335         disp[j] = disp[j-1] + ncolsonproc[j-1];
2336       }
2337 
2338       /* Get complete list of columns for color on each processor */
2339       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2340       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2341       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2342     } else if (ctype == IS_COLORING_GHOSTED) {
2343       /* Determine local number of columns of this color on this process, including ghost points */
2344       nctot = n;
2345       ierr  = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2346       ierr  = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2347     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2348 
2349     /*
2350        Mark all rows affect by these columns
2351     */
2352     /* Temporary option to allow for debugging/testing */
2353     flg  = PETSC_FALSE;
2354     ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
2355     if (!flg) { /*-----------------------------------------------------------------------------*/
2356       /* crude, fast version */
2357       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2358       /* loop over columns*/
2359       for (j=0; j<nctot; j++) {
2360         if (ctype == IS_COLORING_GHOSTED) {
2361           col = ltog[cols[j]];
2362         } else {
2363           col = cols[j];
2364         }
2365         if (col >= cstart && col < cend) {
2366           /* column is in diagonal block of matrix */
2367           rows = A_cj + A_ci[col-cstart];
2368           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2369         } else {
2370 #if defined(PETSC_USE_CTABLE)
2371           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2372           colb--;
2373 #else
2374           colb = baij->colmap[col] - 1;
2375 #endif
2376           if (colb == -1) {
2377             m = 0;
2378           } else {
2379             colb = colb/bs;
2380             rows = B_cj + B_ci[colb];
2381             m    = B_ci[colb+1] - B_ci[colb];
2382           }
2383         }
2384         /* loop over columns marking them in rowhit */
2385         for (k=0; k<m; k++) {
2386           rowhit[*rows++] = col + 1;
2387         }
2388       }
2389 
2390       /* count the number of hits */
2391       nrows = 0;
2392       for (j=0; j<M; j++) {
2393         if (rowhit[j]) nrows++;
2394       }
2395       c->nrows[i] = nrows;
2396       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2397       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2398       ierr        = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2399       nrows       = 0;
2400       for (j=0; j<M; j++) {
2401         if (rowhit[j]) {
2402           c->rows[i][nrows]          = j;
2403           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2404           nrows++;
2405         }
2406       }
2407     } else { /*-------------------------------------------------------------------------------*/
2408       /* slow version, using rowhit as a linked list */
2409       PetscInt currentcol,fm,mfm;
2410       rowhit[M] = M;
2411       nrows     = 0;
2412       /* loop over columns*/
2413       for (j=0; j<nctot; j++) {
2414         if (ctype == IS_COLORING_GHOSTED) {
2415           col = ltog[cols[j]];
2416         } else {
2417           col = cols[j];
2418         }
2419         if (col >= cstart && col < cend) {
2420           /* column is in diagonal block of matrix */
2421           rows = A_cj + A_ci[col-cstart];
2422           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2423         } else {
2424 #if defined(PETSC_USE_CTABLE)
2425           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2426           colb--;
2427 #else
2428           colb = baij->colmap[col] - 1;
2429 #endif
2430           if (colb == -1) {
2431             m = 0;
2432           } else {
2433             colb = colb/bs;
2434             rows = B_cj + B_ci[colb];
2435             m    = B_ci[colb+1] - B_ci[colb];
2436           }
2437         }
2438 
2439         /* loop over columns marking them in rowhit */
2440         fm = M;    /* fm points to first entry in linked list */
2441         for (k=0; k<m; k++) {
2442           currentcol = *rows++;
2443           /* is it already in the list? */
2444           do {
2445             mfm = fm;
2446             fm  = rowhit[fm];
2447           } while (fm < currentcol);
2448           /* not in list so add it */
2449           if (fm != currentcol) {
2450             nrows++;
2451             columnsforrow[currentcol] = col;
2452             /* next three lines insert new entry into linked list */
2453             rowhit[mfm]        = currentcol;
2454             rowhit[currentcol] = fm;
2455             fm                 = currentcol;
2456             /* fm points to present position in list since we know the columns are sorted */
2457           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2458         }
2459       }
2460       c->nrows[i] = nrows;
2461       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2462       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2463       ierr        = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2464       /* now store the linked list of rows into c->rows[i] */
2465       nrows = 0;
2466       fm    = rowhit[M];
2467       do {
2468         c->rows[i][nrows]            = fm;
2469         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2470         fm                           = rowhit[fm];
2471       } while (fm < M);
2472     } /* ---------------------------------------------------------------------------------------*/
2473     ierr = PetscFree(cols);CHKERRQ(ierr);
2474   }
2475 
2476   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2477   /*
2478        vscale will contain the "diagonal" on processor scalings followed by the off processor
2479   */
2480   if (ctype == IS_COLORING_GLOBAL) {
2481     PetscInt *garray;
2482     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2483     for (i=0; i<baij->B->cmap->n/bs; i++) {
2484       for (j=0; j<bs; j++) {
2485         garray[i*bs+j] = bs*baij->garray[i]+j;
2486       }
2487     }
2488     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2489     ierr = PetscFree(garray);CHKERRQ(ierr);
2490     CHKMEMQ;
2491     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2492     for (k=0; k<c->ncolors; k++) {
2493       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2494       for (l=0; l<c->nrows[k]; l++) {
2495         col = c->columnsforrow[k][l];
2496         if (col >= cstart && col < cend) {
2497           /* column is in diagonal block of matrix */
2498           colb = col - cstart;
2499         } else {
2500           /* column  is in "off-processor" part */
2501 #if defined(PETSC_USE_CTABLE)
2502           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2503           colb--;
2504 #else
2505           colb = baij->colmap[col] - 1;
2506 #endif
2507           colb  = colb/bs;
2508           colb += cend - cstart;
2509         }
2510         c->vscaleforrow[k][l] = colb;
2511       }
2512     }
2513   } else if (ctype == IS_COLORING_GHOSTED) {
2514     /* Get gtol mapping */
2515     PetscInt N = mat->cmap->N,nlocal,*gtol;
2516     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2517     for (i=0; i<N; i++) gtol[i] = -1;
2518     ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr);
2519     for (i=0; i<nlocal; i++) gtol[ltog[i]] = i;
2520 
2521     c->vscale = 0; /* will be created in MatFDColoringApply() */
2522     ierr      = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2523     for (k=0; k<c->ncolors; k++) {
2524       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2525       for (l=0; l<c->nrows[k]; l++) {
2526         col = c->columnsforrow[k][l];      /* global column index */
2527 
2528         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2529       }
2530     }
2531     ierr = PetscFree(gtol);CHKERRQ(ierr);
2532   }
2533   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2534 
2535   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2536   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2537   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2538   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2539   if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,&ltog);CHKERRQ(ierr);}
2540   CHKMEMQ;
2541   PetscFunctionReturn(0);
2542 }
2543 
2544 #undef __FUNCT__
2545 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2546 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2547 {
2548   Mat            B;
2549   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2550   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2551   Mat_SeqAIJ     *b;
2552   PetscErrorCode ierr;
2553   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2554   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2555   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2556 
2557   PetscFunctionBegin;
2558   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2559   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2560 
2561   /* ----------------------------------------------------------------
2562      Tell every processor the number of nonzeros per row
2563   */
2564   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2565   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2566     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2567   }
2568   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2569   ierr      = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2570   displs    = recvcounts + size;
2571   for (i=0; i<size; i++) {
2572     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2573     displs[i]     = A->rmap->range[i]/bs;
2574   }
2575 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2576   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2577 #else
2578   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2579 #endif
2580   /* ---------------------------------------------------------------
2581      Create the sequential matrix of the same type as the local block diagonal
2582   */
2583   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2584   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2585   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2586   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2587   b    = (Mat_SeqAIJ*)B->data;
2588 
2589   /*--------------------------------------------------------------------
2590     Copy my part of matrix column indices over
2591   */
2592   sendcount  = ad->nz + bd->nz;
2593   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2594   a_jsendbuf = ad->j;
2595   b_jsendbuf = bd->j;
2596   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2597   cnt        = 0;
2598   for (i=0; i<n; i++) {
2599 
2600     /* put in lower diagonal portion */
2601     m = bd->i[i+1] - bd->i[i];
2602     while (m > 0) {
2603       /* is it above diagonal (in bd (compressed) numbering) */
2604       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2605       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2606       m--;
2607     }
2608 
2609     /* put in diagonal portion */
2610     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2611       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2612     }
2613 
2614     /* put in upper diagonal portion */
2615     while (m-- > 0) {
2616       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2617     }
2618   }
2619   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2620 
2621   /*--------------------------------------------------------------------
2622     Gather all column indices to all processors
2623   */
2624   for (i=0; i<size; i++) {
2625     recvcounts[i] = 0;
2626     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2627       recvcounts[i] += lens[j];
2628     }
2629   }
2630   displs[0] = 0;
2631   for (i=1; i<size; i++) {
2632     displs[i] = displs[i-1] + recvcounts[i-1];
2633   }
2634 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2635   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2636 #else
2637   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2638 #endif
2639   /*--------------------------------------------------------------------
2640     Assemble the matrix into useable form (note numerical values not yet set)
2641   */
2642   /* set the b->ilen (length of each row) values */
2643   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2644   /* set the b->i indices */
2645   b->i[0] = 0;
2646   for (i=1; i<=A->rmap->N/bs; i++) {
2647     b->i[i] = b->i[i-1] + lens[i-1];
2648   }
2649   ierr = PetscFree(lens);CHKERRQ(ierr);
2650   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2651   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2652   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2653 
2654   if (A->symmetric) {
2655     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2656   } else if (A->hermitian) {
2657     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2658   } else if (A->structurally_symmetric) {
2659     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2660   }
2661   *newmat = B;
2662   PetscFunctionReturn(0);
2663 }
2664 
2665 #undef __FUNCT__
2666 #define __FUNCT__ "MatSOR_MPIBAIJ"
2667 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2668 {
2669   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2670   PetscErrorCode ierr;
2671   Vec            bb1 = 0;
2672 
2673   PetscFunctionBegin;
2674   if (flag == SOR_APPLY_UPPER) {
2675     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2676     PetscFunctionReturn(0);
2677   }
2678 
2679   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2680     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2681   }
2682 
2683   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2684     if (flag & SOR_ZERO_INITIAL_GUESS) {
2685       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2686       its--;
2687     }
2688 
2689     while (its--) {
2690       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2691       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2692 
2693       /* update rhs: bb1 = bb - B*x */
2694       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2695       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2696 
2697       /* local sweep */
2698       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2699     }
2700   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2701     if (flag & SOR_ZERO_INITIAL_GUESS) {
2702       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2703       its--;
2704     }
2705     while (its--) {
2706       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2707       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2708 
2709       /* update rhs: bb1 = bb - B*x */
2710       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2711       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2712 
2713       /* local sweep */
2714       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2715     }
2716   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2717     if (flag & SOR_ZERO_INITIAL_GUESS) {
2718       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2719       its--;
2720     }
2721     while (its--) {
2722       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2723       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2724 
2725       /* update rhs: bb1 = bb - B*x */
2726       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2727       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2728 
2729       /* local sweep */
2730       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2731     }
2732   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2733 
2734   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2735   PetscFunctionReturn(0);
2736 }
2737 
2738 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2739 
2740 #undef __FUNCT__
2741 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2742 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2743 {
2744   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2745   PetscErrorCode ierr;
2746 
2747   PetscFunctionBegin;
2748   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2749   PetscFunctionReturn(0);
2750 }
2751 
2752 
2753 /* -------------------------------------------------------------------*/
2754 static struct _MatOps MatOps_Values = {
2755        MatSetValues_MPIBAIJ,
2756        MatGetRow_MPIBAIJ,
2757        MatRestoreRow_MPIBAIJ,
2758        MatMult_MPIBAIJ,
2759 /* 4*/ MatMultAdd_MPIBAIJ,
2760        MatMultTranspose_MPIBAIJ,
2761        MatMultTransposeAdd_MPIBAIJ,
2762        0,
2763        0,
2764        0,
2765 /*10*/ 0,
2766        0,
2767        0,
2768        MatSOR_MPIBAIJ,
2769        MatTranspose_MPIBAIJ,
2770 /*15*/ MatGetInfo_MPIBAIJ,
2771        MatEqual_MPIBAIJ,
2772        MatGetDiagonal_MPIBAIJ,
2773        MatDiagonalScale_MPIBAIJ,
2774        MatNorm_MPIBAIJ,
2775 /*20*/ MatAssemblyBegin_MPIBAIJ,
2776        MatAssemblyEnd_MPIBAIJ,
2777        MatSetOption_MPIBAIJ,
2778        MatZeroEntries_MPIBAIJ,
2779 /*24*/ MatZeroRows_MPIBAIJ,
2780        0,
2781        0,
2782        0,
2783        0,
2784 /*29*/ MatSetUp_MPIBAIJ,
2785        0,
2786        0,
2787        0,
2788        0,
2789 /*34*/ MatDuplicate_MPIBAIJ,
2790        0,
2791        0,
2792        0,
2793        0,
2794 /*39*/ MatAXPY_MPIBAIJ,
2795        MatGetSubMatrices_MPIBAIJ,
2796        MatIncreaseOverlap_MPIBAIJ,
2797        MatGetValues_MPIBAIJ,
2798        MatCopy_MPIBAIJ,
2799 /*44*/ 0,
2800        MatScale_MPIBAIJ,
2801        0,
2802        0,
2803        0,
2804 /*49*/ 0,
2805        0,
2806        0,
2807        0,
2808        0,
2809 /*54*/ MatFDColoringCreate_MPIBAIJ,
2810        0,
2811        MatSetUnfactored_MPIBAIJ,
2812        MatPermute_MPIBAIJ,
2813        MatSetValuesBlocked_MPIBAIJ,
2814 /*59*/ MatGetSubMatrix_MPIBAIJ,
2815        MatDestroy_MPIBAIJ,
2816        MatView_MPIBAIJ,
2817        0,
2818        0,
2819 /*64*/ 0,
2820        0,
2821        0,
2822        0,
2823        0,
2824 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2825        0,
2826        0,
2827        0,
2828        0,
2829 /*74*/ 0,
2830        MatFDColoringApply_BAIJ,
2831        0,
2832        0,
2833        0,
2834 /*79*/ 0,
2835        0,
2836        0,
2837        0,
2838        MatLoad_MPIBAIJ,
2839 /*84*/ 0,
2840        0,
2841        0,
2842        0,
2843        0,
2844 /*89*/ 0,
2845        0,
2846        0,
2847        0,
2848        0,
2849 /*94*/ 0,
2850        0,
2851        0,
2852        0,
2853        0,
2854 /*99*/ 0,
2855        0,
2856        0,
2857        0,
2858        0,
2859 /*104*/0,
2860        MatRealPart_MPIBAIJ,
2861        MatImaginaryPart_MPIBAIJ,
2862        0,
2863        0,
2864 /*109*/0,
2865        0,
2866        0,
2867        0,
2868        0,
2869 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2870        0,
2871        MatGetGhosts_MPIBAIJ,
2872        0,
2873        0,
2874 /*119*/0,
2875        0,
2876        0,
2877        0,
2878        0,
2879 /*124*/0,
2880        0,
2881        MatInvertBlockDiagonal_MPIBAIJ
2882 };
2883 
2884 EXTERN_C_BEGIN
2885 #undef __FUNCT__
2886 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2887 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2888 {
2889   PetscFunctionBegin;
2890   *a = ((Mat_MPIBAIJ*)A->data)->A;
2891   PetscFunctionReturn(0);
2892 }
2893 EXTERN_C_END
2894 
2895 EXTERN_C_BEGIN
2896 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2897 EXTERN_C_END
2898 
2899 EXTERN_C_BEGIN
2900 #undef __FUNCT__
2901 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2902 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2903 {
2904   PetscInt       m,rstart,cstart,cend;
2905   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2906   const PetscInt *JJ    =0;
2907   PetscScalar    *values=0;
2908   PetscErrorCode ierr;
2909 
2910   PetscFunctionBegin;
2911   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2912   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2913   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2914   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2915   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2916   m      = B->rmap->n/bs;
2917   rstart = B->rmap->rstart/bs;
2918   cstart = B->cmap->rstart/bs;
2919   cend   = B->cmap->rend/bs;
2920 
2921   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2922   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2923   for (i=0; i<m; i++) {
2924     nz = ii[i+1] - ii[i];
2925     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2926     nz_max = PetscMax(nz_max,nz);
2927     JJ     = jj + ii[i];
2928     for (j=0; j<nz; j++) {
2929       if (*JJ >= cstart) break;
2930       JJ++;
2931     }
2932     d = 0;
2933     for (; j<nz; j++) {
2934       if (*JJ++ >= cend) break;
2935       d++;
2936     }
2937     d_nnz[i] = d;
2938     o_nnz[i] = nz - d;
2939   }
2940   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2941   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2942 
2943   values = (PetscScalar*)V;
2944   if (!values) {
2945     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2946     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2947   }
2948   for (i=0; i<m; i++) {
2949     PetscInt          row    = i + rstart;
2950     PetscInt          ncols  = ii[i+1] - ii[i];
2951     const PetscInt    *icols = jj + ii[i];
2952     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2953     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2954   }
2955 
2956   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2957   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2958   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2959   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2960   PetscFunctionReturn(0);
2961 }
2962 EXTERN_C_END
2963 
2964 #undef __FUNCT__
2965 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2966 /*@C
2967    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2968    (the default parallel PETSc format).
2969 
2970    Collective on MPI_Comm
2971 
2972    Input Parameters:
2973 +  A - the matrix
2974 .  bs - the block size
2975 .  i - the indices into j for the start of each local row (starts with zero)
2976 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2977 -  v - optional values in the matrix
2978 
2979    Level: developer
2980 
2981 .keywords: matrix, aij, compressed row, sparse, parallel
2982 
2983 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2984 @*/
2985 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2986 {
2987   PetscErrorCode ierr;
2988 
2989   PetscFunctionBegin;
2990   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2991   PetscValidType(B,1);
2992   PetscValidLogicalCollectiveInt(B,bs,2);
2993   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2994   PetscFunctionReturn(0);
2995 }
2996 
2997 EXTERN_C_BEGIN
2998 #undef __FUNCT__
2999 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3000 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
3001 {
3002   Mat_MPIBAIJ    *b;
3003   PetscErrorCode ierr;
3004   PetscInt       i;
3005   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3006 
3007   PetscFunctionBegin;
3008   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3009   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3010 
3011   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3012   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3013   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3014   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3015 
3016   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3017   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3018   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3019   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3020   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3021 
3022   if (d_nnz) {
3023     for (i=0; i<B->rmap->n/bs; i++) {
3024       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3025     }
3026   }
3027   if (o_nnz) {
3028     for (i=0; i<B->rmap->n/bs; i++) {
3029       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3030     }
3031   }
3032 
3033   b      = (Mat_MPIBAIJ*)B->data;
3034   b->bs2 = bs*bs;
3035   b->mbs = B->rmap->n/bs;
3036   b->nbs = B->cmap->n/bs;
3037   b->Mbs = B->rmap->N/bs;
3038   b->Nbs = B->cmap->N/bs;
3039 
3040   for (i=0; i<=b->size; i++) {
3041     b->rangebs[i] = B->rmap->range[i]/bs;
3042   }
3043   b->rstartbs = B->rmap->rstart/bs;
3044   b->rendbs   = B->rmap->rend/bs;
3045   b->cstartbs = B->cmap->rstart/bs;
3046   b->cendbs   = B->cmap->rend/bs;
3047 
3048   if (!B->preallocated) {
3049     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3050     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3051     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3052     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3053     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3054     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3055     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3056     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3057     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3058   }
3059 
3060   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3061   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3062   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3063   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3064   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3065   B->preallocated = PETSC_TRUE;
3066   PetscFunctionReturn(0);
3067 }
3068 EXTERN_C_END
3069 
3070 EXTERN_C_BEGIN
3071 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3072 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3073 EXTERN_C_END
3074 
3075 
3076 EXTERN_C_BEGIN
3077 #undef __FUNCT__
3078 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3079 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3080 {
3081   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3082   PetscErrorCode ierr;
3083   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3084   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3085   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3086 
3087   PetscFunctionBegin;
3088   ierr  = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3089   ii[0] = 0;
3090   CHKMEMQ;
3091   for (i=0; i<M; i++) {
3092     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3093     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3094     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3095     /* remove one from count of matrix has diagonal */
3096     for (j=id[i]; j<id[i+1]; j++) {
3097       if (jd[j] == i) {ii[i+1]--;break;}
3098     }
3099     CHKMEMQ;
3100   }
3101   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3102   cnt  = 0;
3103   for (i=0; i<M; i++) {
3104     for (j=io[i]; j<io[i+1]; j++) {
3105       if (garray[jo[j]] > rstart) break;
3106       jj[cnt++] = garray[jo[j]];
3107       CHKMEMQ;
3108     }
3109     for (k=id[i]; k<id[i+1]; k++) {
3110       if (jd[k] != i) {
3111         jj[cnt++] = rstart + jd[k];
3112         CHKMEMQ;
3113       }
3114     }
3115     for (; j<io[i+1]; j++) {
3116       jj[cnt++] = garray[jo[j]];
3117       CHKMEMQ;
3118     }
3119   }
3120   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
3121   PetscFunctionReturn(0);
3122 }
3123 EXTERN_C_END
3124 
3125 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3126 EXTERN_C_BEGIN
3127 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3128 EXTERN_C_END
3129 
3130 EXTERN_C_BEGIN
3131 #undef __FUNCT__
3132 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3133 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3134 {
3135   PetscErrorCode ierr;
3136   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3137   Mat            B;
3138   Mat_MPIAIJ     *b;
3139 
3140   PetscFunctionBegin;
3141   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3142 
3143   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3144   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3145   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3146   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3147   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
3148   b    = (Mat_MPIAIJ*) B->data;
3149 
3150   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3151   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3152   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3153   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3154   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3155   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3156   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3157   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3158   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3159   if (reuse == MAT_REUSE_MATRIX) {
3160     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3161   } else {
3162    *newmat = B;
3163   }
3164   PetscFunctionReturn(0);
3165 }
3166 EXTERN_C_END
3167 
3168 EXTERN_C_BEGIN
3169 #if defined(PETSC_HAVE_MUMPS)
3170 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3171 #endif
3172 EXTERN_C_END
3173 
3174 /*MC
3175    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3176 
3177    Options Database Keys:
3178 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3179 . -mat_block_size <bs> - set the blocksize used to store the matrix
3180 - -mat_use_hash_table <fact>
3181 
3182   Level: beginner
3183 
3184 .seealso: MatCreateMPIBAIJ
3185 M*/
3186 
3187 EXTERN_C_BEGIN
3188 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3189 EXTERN_C_END
3190 
3191 EXTERN_C_BEGIN
3192 #undef __FUNCT__
3193 #define __FUNCT__ "MatCreate_MPIBAIJ"
3194 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3195 {
3196   Mat_MPIBAIJ    *b;
3197   PetscErrorCode ierr;
3198   PetscBool      flg;
3199 
3200   PetscFunctionBegin;
3201   ierr    = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3202   B->data = (void*)b;
3203 
3204   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3205   B->assembled = PETSC_FALSE;
3206 
3207   B->insertmode = NOT_SET_VALUES;
3208   ierr          = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3209   ierr          = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3210 
3211   /* build local table of row and column ownerships */
3212   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3213 
3214   /* build cache for off array entries formed */
3215   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3216 
3217   b->donotstash  = PETSC_FALSE;
3218   b->colmap      = NULL;
3219   b->garray      = NULL;
3220   b->roworiented = PETSC_TRUE;
3221 
3222   /* stuff used in block assembly */
3223   b->barray = 0;
3224 
3225   /* stuff used for matrix vector multiply */
3226   b->lvec  = 0;
3227   b->Mvctx = 0;
3228 
3229   /* stuff for MatGetRow() */
3230   b->rowindices   = 0;
3231   b->rowvalues    = 0;
3232   b->getrowactive = PETSC_FALSE;
3233 
3234   /* hash table stuff */
3235   b->ht           = 0;
3236   b->hd           = 0;
3237   b->ht_size      = 0;
3238   b->ht_flag      = PETSC_FALSE;
3239   b->ht_fact      = 0;
3240   b->ht_total_ct  = 0;
3241   b->ht_insert_ct = 0;
3242 
3243   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3244   b->ijonly = PETSC_FALSE;
3245 
3246   ierr = PetscOptionsBegin(((PetscObject)B)->comm,NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3247   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3248   if (flg) {
3249     PetscReal fact = 1.39;
3250     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3251     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3252     if (fact <= 1.0) fact = 1.39;
3253     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3254     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3255   }
3256   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3257 
3258 #if defined(PETSC_HAVE_MUMPS)
3259   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3260 #endif
3261   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3262                                            "MatConvert_MPIBAIJ_MPIAdj",
3263                                            MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3264   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3265                                            "MatConvert_MPIBAIJ_MPIAIJ",
3266                                            MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3267   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3268                                            "MatConvert_MPIBAIJ_MPISBAIJ",
3269                                            MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3270   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3271                                            "MatStoreValues_MPIBAIJ",
3272                                            MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3273   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3274                                            "MatRetrieveValues_MPIBAIJ",
3275                                            MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3276   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3277                                            "MatGetDiagonalBlock_MPIBAIJ",
3278                                            MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3279   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3280                                            "MatMPIBAIJSetPreallocation_MPIBAIJ",
3281                                            MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3282   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3283                                            "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3284                                            MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3285   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3286                                            "MatDiagonalScaleLocal_MPIBAIJ",
3287                                            MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3288   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3289                                            "MatSetHashTableFactor_MPIBAIJ",
3290                                            MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3291   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3292                                            "MatConvert_MPIBAIJ_MPIBSTRM",
3293                                            MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3294   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3295   PetscFunctionReturn(0);
3296 }
3297 EXTERN_C_END
3298 
3299 /*MC
3300    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3301 
3302    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3303    and MATMPIBAIJ otherwise.
3304 
3305    Options Database Keys:
3306 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3307 
3308   Level: beginner
3309 
3310 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3311 M*/
3312 
3313 #undef __FUNCT__
3314 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3315 /*@C
3316    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3317    (block compressed row).  For good matrix assembly performance
3318    the user should preallocate the matrix storage by setting the parameters
3319    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3320    performance can be increased by more than a factor of 50.
3321 
3322    Collective on Mat
3323 
3324    Input Parameters:
3325 +  A - the matrix
3326 .  bs   - size of blockk
3327 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3328            submatrix  (same for all local rows)
3329 .  d_nnz - array containing the number of block nonzeros in the various block rows
3330            of the in diagonal portion of the local (possibly different for each block
3331            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3332            set it even if it is zero.
3333 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3334            submatrix (same for all local rows).
3335 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3336            off-diagonal portion of the local submatrix (possibly different for
3337            each block row) or NULL.
3338 
3339    If the *_nnz parameter is given then the *_nz parameter is ignored
3340 
3341    Options Database Keys:
3342 +   -mat_block_size - size of the blocks to use
3343 -   -mat_use_hash_table <fact>
3344 
3345    Notes:
3346    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3347    than it must be used on all processors that share the object for that argument.
3348 
3349    Storage Information:
3350    For a square global matrix we define each processor's diagonal portion
3351    to be its local rows and the corresponding columns (a square submatrix);
3352    each processor's off-diagonal portion encompasses the remainder of the
3353    local matrix (a rectangular submatrix).
3354 
3355    The user can specify preallocated storage for the diagonal part of
3356    the local submatrix with either d_nz or d_nnz (not both).  Set
3357    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3358    memory allocation.  Likewise, specify preallocated storage for the
3359    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3360 
3361    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3362    the figure below we depict these three local rows and all columns (0-11).
3363 
3364 .vb
3365            0 1 2 3 4 5 6 7 8 9 10 11
3366           -------------------
3367    row 3  |  o o o d d d o o o o o o
3368    row 4  |  o o o d d d o o o o o o
3369    row 5  |  o o o d d d o o o o o o
3370           -------------------
3371 .ve
3372 
3373    Thus, any entries in the d locations are stored in the d (diagonal)
3374    submatrix, and any entries in the o locations are stored in the
3375    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3376    stored simply in the MATSEQBAIJ format for compressed row storage.
3377 
3378    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3379    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3380    In general, for PDE problems in which most nonzeros are near the diagonal,
3381    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3382    or you will get TERRIBLE performance; see the users' manual chapter on
3383    matrices.
3384 
3385    You can call MatGetInfo() to get information on how effective the preallocation was;
3386    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3387    You can also run with the option -info and look for messages with the string
3388    malloc in them to see if additional memory allocation was needed.
3389 
3390    Level: intermediate
3391 
3392 .keywords: matrix, block, aij, compressed row, sparse, parallel
3393 
3394 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3395 @*/
3396 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3397 {
3398   PetscErrorCode ierr;
3399 
3400   PetscFunctionBegin;
3401   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3402   PetscValidType(B,1);
3403   PetscValidLogicalCollectiveInt(B,bs,2);
3404   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3405   PetscFunctionReturn(0);
3406 }
3407 
3408 #undef __FUNCT__
3409 #define __FUNCT__ "MatCreateBAIJ"
3410 /*@C
3411    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3412    (block compressed row).  For good matrix assembly performance
3413    the user should preallocate the matrix storage by setting the parameters
3414    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3415    performance can be increased by more than a factor of 50.
3416 
3417    Collective on MPI_Comm
3418 
3419    Input Parameters:
3420 +  comm - MPI communicator
3421 .  bs   - size of blockk
3422 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3423            This value should be the same as the local size used in creating the
3424            y vector for the matrix-vector product y = Ax.
3425 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3426            This value should be the same as the local size used in creating the
3427            x vector for the matrix-vector product y = Ax.
3428 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3429 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3430 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3431            submatrix  (same for all local rows)
3432 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3433            of the in diagonal portion of the local (possibly different for each block
3434            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3435            and set it even if it is zero.
3436 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3437            submatrix (same for all local rows).
3438 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3439            off-diagonal portion of the local submatrix (possibly different for
3440            each block row) or NULL.
3441 
3442    Output Parameter:
3443 .  A - the matrix
3444 
3445    Options Database Keys:
3446 +   -mat_block_size - size of the blocks to use
3447 -   -mat_use_hash_table <fact>
3448 
3449    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3450    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3451    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3452 
3453    Notes:
3454    If the *_nnz parameter is given then the *_nz parameter is ignored
3455 
3456    A nonzero block is any block that as 1 or more nonzeros in it
3457 
3458    The user MUST specify either the local or global matrix dimensions
3459    (possibly both).
3460 
3461    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3462    than it must be used on all processors that share the object for that argument.
3463 
3464    Storage Information:
3465    For a square global matrix we define each processor's diagonal portion
3466    to be its local rows and the corresponding columns (a square submatrix);
3467    each processor's off-diagonal portion encompasses the remainder of the
3468    local matrix (a rectangular submatrix).
3469 
3470    The user can specify preallocated storage for the diagonal part of
3471    the local submatrix with either d_nz or d_nnz (not both).  Set
3472    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3473    memory allocation.  Likewise, specify preallocated storage for the
3474    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3475 
3476    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3477    the figure below we depict these three local rows and all columns (0-11).
3478 
3479 .vb
3480            0 1 2 3 4 5 6 7 8 9 10 11
3481           -------------------
3482    row 3  |  o o o d d d o o o o o o
3483    row 4  |  o o o d d d o o o o o o
3484    row 5  |  o o o d d d o o o o o o
3485           -------------------
3486 .ve
3487 
3488    Thus, any entries in the d locations are stored in the d (diagonal)
3489    submatrix, and any entries in the o locations are stored in the
3490    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3491    stored simply in the MATSEQBAIJ format for compressed row storage.
3492 
3493    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3494    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3495    In general, for PDE problems in which most nonzeros are near the diagonal,
3496    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3497    or you will get TERRIBLE performance; see the users' manual chapter on
3498    matrices.
3499 
3500    Level: intermediate
3501 
3502 .keywords: matrix, block, aij, compressed row, sparse, parallel
3503 
3504 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3505 @*/
3506 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3507 {
3508   PetscErrorCode ierr;
3509   PetscMPIInt    size;
3510 
3511   PetscFunctionBegin;
3512   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3513   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3514   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3515   if (size > 1) {
3516     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3517     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3518   } else {
3519     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3520     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3521   }
3522   PetscFunctionReturn(0);
3523 }
3524 
3525 #undef __FUNCT__
3526 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3527 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3528 {
3529   Mat            mat;
3530   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3531   PetscErrorCode ierr;
3532   PetscInt       len=0;
3533 
3534   PetscFunctionBegin;
3535   *newmat = 0;
3536   ierr    = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3537   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3538   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3539   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3540 
3541   mat->factortype   = matin->factortype;
3542   mat->preallocated = PETSC_TRUE;
3543   mat->assembled    = PETSC_TRUE;
3544   mat->insertmode   = NOT_SET_VALUES;
3545 
3546   a             = (Mat_MPIBAIJ*)mat->data;
3547   mat->rmap->bs = matin->rmap->bs;
3548   a->bs2        = oldmat->bs2;
3549   a->mbs        = oldmat->mbs;
3550   a->nbs        = oldmat->nbs;
3551   a->Mbs        = oldmat->Mbs;
3552   a->Nbs        = oldmat->Nbs;
3553 
3554   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3555   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3556 
3557   a->size         = oldmat->size;
3558   a->rank         = oldmat->rank;
3559   a->donotstash   = oldmat->donotstash;
3560   a->roworiented  = oldmat->roworiented;
3561   a->rowindices   = 0;
3562   a->rowvalues    = 0;
3563   a->getrowactive = PETSC_FALSE;
3564   a->barray       = 0;
3565   a->rstartbs     = oldmat->rstartbs;
3566   a->rendbs       = oldmat->rendbs;
3567   a->cstartbs     = oldmat->cstartbs;
3568   a->cendbs       = oldmat->cendbs;
3569 
3570   /* hash table stuff */
3571   a->ht           = 0;
3572   a->hd           = 0;
3573   a->ht_size      = 0;
3574   a->ht_flag      = oldmat->ht_flag;
3575   a->ht_fact      = oldmat->ht_fact;
3576   a->ht_total_ct  = 0;
3577   a->ht_insert_ct = 0;
3578 
3579   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3580   if (oldmat->colmap) {
3581 #if defined(PETSC_USE_CTABLE)
3582     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3583 #else
3584     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3585     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3586     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3587 #endif
3588   } else a->colmap = 0;
3589 
3590   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3591     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3592     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3593     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3594   } else a->garray = 0;
3595 
3596   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3597   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3598   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3599   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3600   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3601 
3602   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3603   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3604   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3605   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3606   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3607   *newmat = mat;
3608   PetscFunctionReturn(0);
3609 }
3610 
3611 #undef __FUNCT__
3612 #define __FUNCT__ "MatLoad_MPIBAIJ"
3613 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3614 {
3615   PetscErrorCode ierr;
3616   int            fd;
3617   PetscInt       i,nz,j,rstart,rend;
3618   PetscScalar    *vals,*buf;
3619   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3620   MPI_Status     status;
3621   PetscMPIInt    rank,size,maxnz;
3622   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3623   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3624   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3625   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3626   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3627   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3628 
3629   PetscFunctionBegin;
3630   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3631   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3632   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3633 
3634   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3635   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3636   if (!rank) {
3637     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3638     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3639     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3640   }
3641 
3642   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3643 
3644   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3645   M    = header[1]; N = header[2];
3646 
3647   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3648   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3649   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3650 
3651   /* If global sizes are set, check if they are consistent with that given in the file */
3652   if (sizesset) {
3653     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3654   }
3655   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3656   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3657 
3658   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3659 
3660   /*
3661      This code adds extra rows to make sure the number of rows is
3662      divisible by the blocksize
3663   */
3664   Mbs        = M/bs;
3665   extra_rows = bs - M + bs*Mbs;
3666   if (extra_rows == bs) extra_rows = 0;
3667   else                  Mbs++;
3668   if (extra_rows && !rank) {
3669     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3670   }
3671 
3672   /* determine ownership of all rows */
3673   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3674     mbs = Mbs/size + ((Mbs % size) > rank);
3675     m   = mbs*bs;
3676   } else { /* User set */
3677     m   = newmat->rmap->n;
3678     mbs = m/bs;
3679   }
3680   ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3681   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3682 
3683   /* process 0 needs enough room for process with most rows */
3684   if (!rank) {
3685     mmax = rowners[1];
3686     for (i=2; i<=size; i++) {
3687       mmax = PetscMax(mmax,rowners[i]);
3688     }
3689     mmax*=bs;
3690   } else mmax = m;
3691 
3692   rowners[0] = 0;
3693   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3694   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3695   rstart = rowners[rank];
3696   rend   = rowners[rank+1];
3697 
3698   /* distribute row lengths to all processors */
3699   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3700   if (!rank) {
3701     mend = m;
3702     if (size == 1) mend = mend - extra_rows;
3703     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3704     for (j=mend; j<m; j++) locrowlens[j] = 1;
3705     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3706     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3707     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3708     for (j=0; j<m; j++) {
3709       procsnz[0] += locrowlens[j];
3710     }
3711     for (i=1; i<size; i++) {
3712       mend = browners[i+1] - browners[i];
3713       if (i == size-1) mend = mend - extra_rows;
3714       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3715       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3716       /* calculate the number of nonzeros on each processor */
3717       for (j=0; j<browners[i+1]-browners[i]; j++) {
3718         procsnz[i] += rowlengths[j];
3719       }
3720       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3721     }
3722     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3723   } else {
3724     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3725   }
3726 
3727   if (!rank) {
3728     /* determine max buffer needed and allocate it */
3729     maxnz = procsnz[0];
3730     for (i=1; i<size; i++) {
3731       maxnz = PetscMax(maxnz,procsnz[i]);
3732     }
3733     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3734 
3735     /* read in my part of the matrix column indices  */
3736     nz     = procsnz[0];
3737     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3738     mycols = ibuf;
3739     if (size == 1) nz -= extra_rows;
3740     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3741     if (size == 1) {
3742       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3743     }
3744 
3745     /* read in every ones (except the last) and ship off */
3746     for (i=1; i<size-1; i++) {
3747       nz   = procsnz[i];
3748       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3749       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3750     }
3751     /* read in the stuff for the last proc */
3752     if (size != 1) {
3753       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3754       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3755       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3756       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3757     }
3758     ierr = PetscFree(cols);CHKERRQ(ierr);
3759   } else {
3760     /* determine buffer space needed for message */
3761     nz = 0;
3762     for (i=0; i<m; i++) {
3763       nz += locrowlens[i];
3764     }
3765     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3766     mycols = ibuf;
3767     /* receive message of column indices*/
3768     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3769     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3770     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3771   }
3772 
3773   /* loop over local rows, determining number of off diagonal entries */
3774   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3775   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3776   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3777   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3778   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3779   rowcount = 0; nzcount = 0;
3780   for (i=0; i<mbs; i++) {
3781     dcount  = 0;
3782     odcount = 0;
3783     for (j=0; j<bs; j++) {
3784       kmax = locrowlens[rowcount];
3785       for (k=0; k<kmax; k++) {
3786         tmp = mycols[nzcount++]/bs;
3787         if (!mask[tmp]) {
3788           mask[tmp] = 1;
3789           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3790           else masked1[dcount++] = tmp;
3791         }
3792       }
3793       rowcount++;
3794     }
3795 
3796     dlens[i]  = dcount;
3797     odlens[i] = odcount;
3798 
3799     /* zero out the mask elements we set */
3800     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3801     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3802   }
3803 
3804 
3805   if (!sizesset) {
3806     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3807   }
3808   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3809 
3810   if (!rank) {
3811     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3812     /* read in my part of the matrix numerical values  */
3813     nz     = procsnz[0];
3814     vals   = buf;
3815     mycols = ibuf;
3816     if (size == 1) nz -= extra_rows;
3817     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3818     if (size == 1) {
3819       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3820     }
3821 
3822     /* insert into matrix */
3823     jj = rstart*bs;
3824     for (i=0; i<m; i++) {
3825       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3826       mycols += locrowlens[i];
3827       vals   += locrowlens[i];
3828       jj++;
3829     }
3830     /* read in other processors (except the last one) and ship out */
3831     for (i=1; i<size-1; i++) {
3832       nz   = procsnz[i];
3833       vals = buf;
3834       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3835       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3836     }
3837     /* the last proc */
3838     if (size != 1) {
3839       nz   = procsnz[i] - extra_rows;
3840       vals = buf;
3841       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3842       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3843       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3844     }
3845     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3846   } else {
3847     /* receive numeric values */
3848     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3849 
3850     /* receive message of values*/
3851     vals   = buf;
3852     mycols = ibuf;
3853     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3854 
3855     /* insert into matrix */
3856     jj = rstart*bs;
3857     for (i=0; i<m; i++) {
3858       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3859       mycols += locrowlens[i];
3860       vals   += locrowlens[i];
3861       jj++;
3862     }
3863   }
3864   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3865   ierr = PetscFree(buf);CHKERRQ(ierr);
3866   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3867   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3868   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3869   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3870   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3871   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3872   PetscFunctionReturn(0);
3873 }
3874 
3875 #undef __FUNCT__
3876 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3877 /*@
3878    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3879 
3880    Input Parameters:
3881 .  mat  - the matrix
3882 .  fact - factor
3883 
3884    Not Collective, each process can use a different factor
3885 
3886    Level: advanced
3887 
3888   Notes:
3889    This can also be set by the command line option: -mat_use_hash_table <fact>
3890 
3891 .keywords: matrix, hashtable, factor, HT
3892 
3893 .seealso: MatSetOption()
3894 @*/
3895 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3896 {
3897   PetscErrorCode ierr;
3898 
3899   PetscFunctionBegin;
3900   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3901   PetscFunctionReturn(0);
3902 }
3903 
3904 EXTERN_C_BEGIN
3905 #undef __FUNCT__
3906 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3907 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3908 {
3909   Mat_MPIBAIJ *baij;
3910 
3911   PetscFunctionBegin;
3912   baij          = (Mat_MPIBAIJ*)mat->data;
3913   baij->ht_fact = fact;
3914   PetscFunctionReturn(0);
3915 }
3916 EXTERN_C_END
3917 
3918 #undef __FUNCT__
3919 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3920 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3921 {
3922   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3923 
3924   PetscFunctionBegin;
3925   *Ad     = a->A;
3926   *Ao     = a->B;
3927   *colmap = a->garray;
3928   PetscFunctionReturn(0);
3929 }
3930 
3931 /*
3932     Special version for direct calls from Fortran (to eliminate two function call overheads
3933 */
3934 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3935 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3936 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3937 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3938 #endif
3939 
3940 #undef __FUNCT__
3941 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3942 /*@C
3943   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3944 
3945   Collective on Mat
3946 
3947   Input Parameters:
3948 + mat - the matrix
3949 . min - number of input rows
3950 . im - input rows
3951 . nin - number of input columns
3952 . in - input columns
3953 . v - numerical values input
3954 - addvin - INSERT_VALUES or ADD_VALUES
3955 
3956   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3957 
3958   Level: advanced
3959 
3960 .seealso:   MatSetValuesBlocked()
3961 @*/
3962 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3963 {
3964   /* convert input arguments to C version */
3965   Mat        mat  = *matin;
3966   PetscInt   m    = *min, n = *nin;
3967   InsertMode addv = *addvin;
3968 
3969   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3970   const MatScalar *value;
3971   MatScalar       *barray     = baij->barray;
3972   PetscBool       roworiented = baij->roworiented;
3973   PetscErrorCode  ierr;
3974   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3975   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3976   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3977 
3978   PetscFunctionBegin;
3979   /* tasks normally handled by MatSetValuesBlocked() */
3980   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3981 #if defined(PETSC_USE_DEBUG)
3982   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3983   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3984 #endif
3985   if (mat->assembled) {
3986     mat->was_assembled = PETSC_TRUE;
3987     mat->assembled     = PETSC_FALSE;
3988   }
3989   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3990 
3991 
3992   if (!barray) {
3993     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3994     baij->barray = barray;
3995   }
3996 
3997   if (roworiented) stepval = (n-1)*bs;
3998   else stepval = (m-1)*bs;
3999 
4000   for (i=0; i<m; i++) {
4001     if (im[i] < 0) continue;
4002 #if defined(PETSC_USE_DEBUG)
4003     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4004 #endif
4005     if (im[i] >= rstart && im[i] < rend) {
4006       row = im[i] - rstart;
4007       for (j=0; j<n; j++) {
4008         /* If NumCol = 1 then a copy is not required */
4009         if ((roworiented) && (n == 1)) {
4010           barray = (MatScalar*)v + i*bs2;
4011         } else if ((!roworiented) && (m == 1)) {
4012           barray = (MatScalar*)v + j*bs2;
4013         } else { /* Here a copy is required */
4014           if (roworiented) {
4015             value = v + i*(stepval+bs)*bs + j*bs;
4016           } else {
4017             value = v + j*(stepval+bs)*bs + i*bs;
4018           }
4019           for (ii=0; ii<bs; ii++,value+=stepval) {
4020             for (jj=0; jj<bs; jj++) {
4021               *barray++ = *value++;
4022             }
4023           }
4024           barray -=bs2;
4025         }
4026 
4027         if (in[j] >= cstart && in[j] < cend) {
4028           col  = in[j] - cstart;
4029           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4030         } else if (in[j] < 0) continue;
4031 #if defined(PETSC_USE_DEBUG)
4032         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4033 #endif
4034         else {
4035           if (mat->was_assembled) {
4036             if (!baij->colmap) {
4037               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4038             }
4039 
4040 #if defined(PETSC_USE_DEBUG)
4041 #if defined(PETSC_USE_CTABLE)
4042             { PetscInt data;
4043               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4044               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4045             }
4046 #else
4047             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4048 #endif
4049 #endif
4050 #if defined(PETSC_USE_CTABLE)
4051             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4052             col  = (col - 1)/bs;
4053 #else
4054             col = (baij->colmap[in[j]] - 1)/bs;
4055 #endif
4056             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4057               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4058               col  =  in[j];
4059             }
4060           } else col = in[j];
4061           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4062         }
4063       }
4064     } else {
4065       if (!baij->donotstash) {
4066         if (roworiented) {
4067           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4068         } else {
4069           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4070         }
4071       }
4072     }
4073   }
4074 
4075   /* task normally handled by MatSetValuesBlocked() */
4076   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4077   PetscFunctionReturn(0);
4078 }
4079 
4080 #undef __FUNCT__
4081 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4082 /*@
4083      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4084          CSR format the local rows.
4085 
4086    Collective on MPI_Comm
4087 
4088    Input Parameters:
4089 +  comm - MPI communicator
4090 .  bs - the block size, only a block size of 1 is supported
4091 .  m - number of local rows (Cannot be PETSC_DECIDE)
4092 .  n - This value should be the same as the local size used in creating the
4093        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4094        calculated if N is given) For square matrices n is almost always m.
4095 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4096 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4097 .   i - row indices
4098 .   j - column indices
4099 -   a - matrix values
4100 
4101    Output Parameter:
4102 .   mat - the matrix
4103 
4104    Level: intermediate
4105 
4106    Notes:
4107        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4108      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4109      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4110 
4111        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4112 
4113 .keywords: matrix, aij, compressed row, sparse, parallel
4114 
4115 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4116           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4117 @*/
4118 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4119 {
4120   PetscErrorCode ierr;
4121 
4122   PetscFunctionBegin;
4123   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4124   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4125   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4126   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4127   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4128   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4129   PetscFunctionReturn(0);
4130 }
4131