xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision eb4227cfdd169cc03749ba3ef12d3932f2f322bf)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {
29       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
30     }
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++) {
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
40       va[i] = vb[i];
41       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
42     }
43   }
44 
45   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
46   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
47   ierr = PetscFree(idxb);CHKERRQ(ierr);
48   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 #undef __FUNCT__
53 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
54 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
55 {
56   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
57   PetscErrorCode ierr;
58 
59   PetscFunctionBegin;
60   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
61   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
62   PetscFunctionReturn(0);
63 }
64 
65 #undef __FUNCT__
66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
67 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79      Local utility routine that creates a mapping from the global column
80    number to the local number in the off-diagonal part of the local
81    storage of the matrix.  This is done in a non scalable way since the
82    length of colmap equals the global matrix length.
83 */
84 #undef __FUNCT__
85 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
86 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
87 {
88   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
89   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
90   PetscErrorCode ierr;
91   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
92 
93   PetscFunctionBegin;
94 #if defined(PETSC_USE_CTABLE)
95   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
96   for (i=0; i<nbs; i++) {
97     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
98   }
99 #else
100   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
101   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
104 #endif
105   PetscFunctionReturn(0);
106 }
107 
108 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
109   { \
110  \
111     brow = row/bs;  \
112     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
113     rmax = aimax[brow]; nrow = ailen[brow]; \
114     bcol = col/bs; \
115     ridx = row % bs; cidx = col % bs; \
116     low  = 0; high = nrow; \
117     while (high-low > 3) { \
118       t = (low+high)/2; \
119       if (rp[t] > bcol) high = t; \
120       else              low  = t; \
121     } \
122     for (_i=low; _i<high; _i++) { \
123       if (rp[_i] > bcol) break; \
124       if (rp[_i] == bcol) { \
125         bap = ap +  bs2*_i + bs*cidx + ridx; \
126         if (addv == ADD_VALUES) *bap += value;  \
127         else                    *bap  = value;  \
128         goto a_noinsert; \
129       } \
130     } \
131     if (a->nonew == 1) goto a_noinsert; \
132     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
133     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
134     N = nrow++ - 1;  \
135     /* shift up all the later entries in this row */ \
136     for (ii=N; ii>=_i; ii--) { \
137       rp[ii+1] = rp[ii]; \
138       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
139     } \
140     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
141     rp[_i]                      = bcol;  \
142     ap[bs2*_i + bs*cidx + ridx] = value;  \
143 a_noinsert:; \
144     ailen[brow] = nrow; \
145   }
146 
147 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
148   { \
149     brow = row/bs;  \
150     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
151     rmax = bimax[brow]; nrow = bilen[brow]; \
152     bcol = col/bs; \
153     ridx = row % bs; cidx = col % bs; \
154     low  = 0; high = nrow; \
155     while (high-low > 3) { \
156       t = (low+high)/2; \
157       if (rp[t] > bcol) high = t; \
158       else              low  = t; \
159     } \
160     for (_i=low; _i<high; _i++) { \
161       if (rp[_i] > bcol) break; \
162       if (rp[_i] == bcol) { \
163         bap = ap +  bs2*_i + bs*cidx + ridx; \
164         if (addv == ADD_VALUES) *bap += value;  \
165         else                    *bap  = value;  \
166         goto b_noinsert; \
167       } \
168     } \
169     if (b->nonew == 1) goto b_noinsert; \
170     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
171     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
172     CHKMEMQ; \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   if (v) PetscValidScalarPointer(v,6);
216   for (i=0; i<m; i++) {
217     if (im[i] < 0) continue;
218 #if defined(PETSC_USE_DEBUG)
219     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
220 #endif
221     if (im[i] >= rstart_orig && im[i] < rend_orig) {
222       row = im[i] - rstart_orig;
223       for (j=0; j<n; j++) {
224         if (in[j] >= cstart_orig && in[j] < cend_orig) {
225           col = in[j] - cstart_orig;
226           if (roworiented) value = v[i*n+j];
227           else             value = v[i+j*m];
228           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         } else if (in[j] < 0) continue;
231 #if defined(PETSC_USE_DEBUG)
232         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined(PETSC_USE_CTABLE)
240             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
241             col  = col - 1;
242 #else
243             col = baij->colmap[in[j]/bs] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
246               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
247               col  =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
249               B    = baij->B;
250               b    = (Mat_SeqBAIJ*)(B)->data;
251               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
252               ba   =b->a;
253             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
254             else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j];
257           else             value = v[i+j*m];
258           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
260         }
261       }
262     } else {
263       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
264       if (!baij->donotstash) {
265         mat->assembled = PETSC_FALSE;
266         if (roworiented) {
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
268         } else {
269           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
270         }
271       }
272     }
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
280 {
281   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
282   const PetscScalar *value;
283   MatScalar         *barray     = baij->barray;
284   PetscBool         roworiented = baij->roworiented;
285   PetscErrorCode    ierr;
286   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
287   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
288   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
289 
290   PetscFunctionBegin;
291   if (!barray) {
292     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
293     baij->barray = barray;
294   }
295 
296   if (roworiented) stepval = (n-1)*bs;
297   else stepval = (m-1)*bs;
298 
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if ((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316             value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
320             barray += bs;
321           }
322           barray -= bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend) {
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         } else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined(PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined(PETSC_USE_CTABLE)
349             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
355               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col  =  in[j];
357             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
358           } else col = in[j];
359           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
360         }
361       }
362     } else {
363       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
364       if (!baij->donotstash) {
365         if (roworiented) {
366           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
367         } else {
368           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         }
370       }
371     }
372   }
373   PetscFunctionReturn(0);
374 }
375 
376 #define HASH_KEY 0.6180339887
377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 #undef __FUNCT__
381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
383 {
384   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
385   PetscBool      roworiented = baij->roworiented;
386   PetscErrorCode ierr;
387   PetscInt       i,j,row,col;
388   PetscInt       rstart_orig=mat->rmap->rstart;
389   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
390   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
391   PetscReal      tmp;
392   MatScalar      **HD = baij->hd,value;
393 #if defined(PETSC_USE_DEBUG)
394   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
395 #endif
396 
397   PetscFunctionBegin;
398   if (v) PetscValidScalarPointer(v,6);
399   for (i=0; i<m; i++) {
400 #if defined(PETSC_USE_DEBUG)
401     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
402     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
403 #endif
404     row = im[i];
405     if (row >= rstart_orig && row < rend_orig) {
406       for (j=0; j<n; j++) {
407         col = in[j];
408         if (roworiented) value = v[i*n+j];
409         else             value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
423             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
424           }
425         }
426 #else
427         if (HT[idx] != key) {
428           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
429           if (idx == size) {
430             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
431             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
432           }
433         }
434 #endif
435         /* A HASH table entry is found, so insert the values at the correct address */
436         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
438       }
439     } else if (!baij->donotstash) {
440       if (roworiented) {
441         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
442       } else {
443         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
444       }
445     }
446   }
447 #if defined(PETSC_USE_DEBUG)
448   baij->ht_total_ct  = total_ct;
449   baij->ht_insert_ct = insert_ct;
450 #endif
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
457 {
458   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
459   PetscBool         roworiented = baij->roworiented;
460   PetscErrorCode    ierr;
461   PetscInt          i,j,ii,jj,row,col;
462   PetscInt          rstart=baij->rstartbs;
463   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
464   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
465   PetscReal         tmp;
466   MatScalar         **HD = baij->hd,*baij_a;
467   const PetscScalar *v_t,*value;
468 #if defined(PETSC_USE_DEBUG)
469   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
470 #endif
471 
472   PetscFunctionBegin;
473   if (roworiented) stepval = (n-1)*bs;
474   else stepval = (m-1)*bs;
475 
476   for (i=0; i<m; i++) {
477 #if defined(PETSC_USE_DEBUG)
478     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
479     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
480 #endif
481     row = im[i];
482     v_t = v + i*nbs2;
483     if (row >= rstart && row < rend) {
484       for (j=0; j<n; j++) {
485         col = in[j];
486 
487         /* Look up into the Hash Table */
488         key = row*Nbs+col+1;
489         h1  = HASH(size,key,tmp);
490 
491         idx = h1;
492 #if defined(PETSC_USE_DEBUG)
493         total_ct++;
494         insert_ct++;
495         if (HT[idx] != key) {
496           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
497           if (idx == size) {
498             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
499             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
500           }
501         }
502 #else
503         if (HT[idx] != key) {
504           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
505           if (idx == size) {
506             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
507             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
508           }
509         }
510 #endif
511         baij_a = HD[idx];
512         if (roworiented) {
513           /*value = v + i*(stepval+bs)*bs + j*bs;*/
514           /* value = v + (i*(stepval+bs)+j)*bs; */
515           value = v_t;
516           v_t  += bs;
517           if (addv == ADD_VALUES) {
518             for (ii=0; ii<bs; ii++,value+=stepval) {
519               for (jj=ii; jj<bs2; jj+=bs) {
520                 baij_a[jj] += *value++;
521               }
522             }
523           } else {
524             for (ii=0; ii<bs; ii++,value+=stepval) {
525               for (jj=ii; jj<bs2; jj+=bs) {
526                 baij_a[jj] = *value++;
527               }
528             }
529           }
530         } else {
531           value = v + j*(stepval+bs)*bs + i*bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
534               for (jj=0; jj<bs; jj++) {
535                 baij_a[jj] += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
540               for (jj=0; jj<bs; jj++) {
541                 baij_a[jj] = *value++;
542               }
543             }
544           }
545         }
546       }
547     } else {
548       if (!baij->donotstash) {
549         if (roworiented) {
550           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557 #if defined(PETSC_USE_DEBUG)
558   baij->ht_total_ct  = total_ct;
559   baij->ht_insert_ct = insert_ct;
560 #endif
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatGetValues_MPIBAIJ"
566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
567 {
568   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
569   PetscErrorCode ierr;
570   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
571   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
572 
573   PetscFunctionBegin;
574   for (i=0; i<m; i++) {
575     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
576     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
577     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
578       row = idxm[i] - bsrstart;
579       for (j=0; j<n; j++) {
580         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
581         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
582         if (idxn[j] >= bscstart && idxn[j] < bscend) {
583           col  = idxn[j] - bscstart;
584           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
585         } else {
586           if (!baij->colmap) {
587             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588           }
589 #if defined(PETSC_USE_CTABLE)
590           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
591           data--;
592 #else
593           data = baij->colmap[idxn[j]/bs]-1;
594 #endif
595           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
596           else {
597             col  = data + idxn[j]%bs;
598             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599           }
600         }
601       }
602     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatNorm_MPIBAIJ"
609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
610 {
611   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
612   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
613   PetscErrorCode ierr;
614   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
615   PetscReal      sum = 0.0;
616   MatScalar      *v;
617 
618   PetscFunctionBegin;
619   if (baij->size == 1) {
620     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
621   } else {
622     if (type == NORM_FROBENIUS) {
623       v  = amat->a;
624       nz = amat->nz*bs2;
625       for (i=0; i<nz; i++) {
626         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
627       }
628       v  = bmat->a;
629       nz = bmat->nz*bs2;
630       for (i=0; i<nz; i++) {
631         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
632       }
633       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
634       *nrm = PetscSqrtReal(*nrm);
635     } else if (type == NORM_1) { /* max column sum */
636       PetscReal *tmp,*tmp2;
637       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
638       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
639       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
640       v    = amat->a; jj = amat->j;
641       for (i=0; i<amat->nz; i++) {
642         for (j=0; j<bs; j++) {
643           col = bs*(cstart + *jj) + j; /* column index */
644           for (row=0; row<bs; row++) {
645             tmp[col] += PetscAbsScalar(*v);  v++;
646           }
647         }
648         jj++;
649       }
650       v = bmat->a; jj = bmat->j;
651       for (i=0; i<bmat->nz; i++) {
652         for (j=0; j<bs; j++) {
653           col = bs*garray[*jj] + j;
654           for (row=0; row<bs; row++) {
655             tmp[col] += PetscAbsScalar(*v); v++;
656           }
657         }
658         jj++;
659       }
660       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
661       *nrm = 0.0;
662       for (j=0; j<mat->cmap->N; j++) {
663         if (tmp2[j] > *nrm) *nrm = tmp2[j];
664       }
665       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
666     } else if (type == NORM_INFINITY) { /* max row sum */
667       PetscReal *sums;
668       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
669       sum  = 0.0;
670       for (j=0; j<amat->mbs; j++) {
671         for (row=0; row<bs; row++) sums[row] = 0.0;
672         v  = amat->a + bs2*amat->i[j];
673         nz = amat->i[j+1]-amat->i[j];
674         for (i=0; i<nz; i++) {
675           for (col=0; col<bs; col++) {
676             for (row=0; row<bs; row++) {
677               sums[row] += PetscAbsScalar(*v); v++;
678             }
679           }
680         }
681         v  = bmat->a + bs2*bmat->i[j];
682         nz = bmat->i[j+1]-bmat->i[j];
683         for (i=0; i<nz; i++) {
684           for (col=0; col<bs; col++) {
685             for (row=0; row<bs; row++) {
686               sums[row] += PetscAbsScalar(*v); v++;
687             }
688           }
689         }
690         for (row=0; row<bs; row++) {
691           if (sums[row] > sum) sum = sums[row];
692         }
693       }
694       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
695       ierr = PetscFree(sums);CHKERRQ(ierr);
696     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 /*
702   Creates the hash table, and sets the table
703   This table is created only once.
704   If new entried need to be added to the matrix
705   then the hash table has to be destroyed and
706   recreated.
707 */
708 #undef __FUNCT__
709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
711 {
712   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
713   Mat            A     = baij->A,B=baij->B;
714   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
715   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
716   PetscErrorCode ierr;
717   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
718   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
719   PetscInt       *HT,key;
720   MatScalar      **HD;
721   PetscReal      tmp;
722 #if defined(PETSC_USE_INFO)
723   PetscInt ct=0,max=0;
724 #endif
725 
726   PetscFunctionBegin;
727   if (baij->ht) PetscFunctionReturn(0);
728 
729   baij->ht_size = (PetscInt)(factor*nz);
730   ht_size       = baij->ht_size;
731 
732   /* Allocate Memory for Hash Table */
733   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
734   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
735   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
736   HD   = baij->hd;
737   HT   = baij->ht;
738 
739   /* Loop Over A */
740   for (i=0; i<a->mbs; i++) {
741     for (j=ai[i]; j<ai[i+1]; j++) {
742       row = i+rstart;
743       col = aj[j]+cstart;
744 
745       key = row*Nbs + col + 1;
746       h1  = HASH(ht_size,key,tmp);
747       for (k=0; k<ht_size; k++) {
748         if (!HT[(h1+k)%ht_size]) {
749           HT[(h1+k)%ht_size] = key;
750           HD[(h1+k)%ht_size] = a->a + j*bs2;
751           break;
752 #if defined(PETSC_USE_INFO)
753         } else {
754           ct++;
755 #endif
756         }
757       }
758 #if defined(PETSC_USE_INFO)
759       if (k> max) max = k;
760 #endif
761     }
762   }
763   /* Loop Over B */
764   for (i=0; i<b->mbs; i++) {
765     for (j=bi[i]; j<bi[i+1]; j++) {
766       row = i+rstart;
767       col = garray[bj[j]];
768       key = row*Nbs + col + 1;
769       h1  = HASH(ht_size,key,tmp);
770       for (k=0; k<ht_size; k++) {
771         if (!HT[(h1+k)%ht_size]) {
772           HT[(h1+k)%ht_size] = key;
773           HD[(h1+k)%ht_size] = b->a + j*bs2;
774           break;
775 #if defined(PETSC_USE_INFO)
776         } else {
777           ct++;
778 #endif
779         }
780       }
781 #if defined(PETSC_USE_INFO)
782       if (k> max) max = k;
783 #endif
784     }
785   }
786 
787   /* Print Summary */
788 #if defined(PETSC_USE_INFO)
789   for (i=0,j=0; i<ht_size; i++) {
790     if (HT[i]) j++;
791   }
792   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
793 #endif
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
799 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
800 {
801   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
802   PetscErrorCode ierr;
803   PetscInt       nstash,reallocs;
804   InsertMode     addv;
805 
806   PetscFunctionBegin;
807   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
808 
809   /* make sure all processors are either in INSERTMODE or ADDMODE */
810   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
811   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
812   mat->insertmode = addv; /* in case this processor had no cache */
813 
814   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
815   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
816   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
817   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
818   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
819   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
825 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
826 {
827   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
828   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
829   PetscErrorCode ierr;
830   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
831   PetscInt       *row,*col;
832   PetscBool      r1,r2,r3,other_disassembled;
833   MatScalar      *val;
834   InsertMode     addv = mat->insertmode;
835   PetscMPIInt    n;
836 
837   PetscFunctionBegin;
838   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
839   if (!baij->donotstash && !mat->nooffprocentries) {
840     while (1) {
841       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
842       if (!flg) break;
843 
844       for (i=0; i<n;) {
845         /* Now identify the consecutive vals belonging to the same row */
846         for (j=i,rstart=row[j]; j<n; j++) {
847           if (row[j] != rstart) break;
848         }
849         if (j < n) ncols = j-i;
850         else       ncols = n-i;
851         /* Now assemble all these values with a single function call */
852         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
853         i    = j;
854       }
855     }
856     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
857     /* Now process the block-stash. Since the values are stashed column-oriented,
858        set the roworiented flag to column oriented, and after MatSetValues()
859        restore the original flags */
860     r1 = baij->roworiented;
861     r2 = a->roworiented;
862     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
863 
864     baij->roworiented = PETSC_FALSE;
865     a->roworiented    = PETSC_FALSE;
866 
867     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
868     while (1) {
869       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
870       if (!flg) break;
871 
872       for (i=0; i<n;) {
873         /* Now identify the consecutive vals belonging to the same row */
874         for (j=i,rstart=row[j]; j<n; j++) {
875           if (row[j] != rstart) break;
876         }
877         if (j < n) ncols = j-i;
878         else       ncols = n-i;
879         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
880         i    = j;
881       }
882     }
883     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
884 
885     baij->roworiented = r1;
886     a->roworiented    = r2;
887 
888     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
889   }
890 
891   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
892   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
893 
894   /* determine if any processor has disassembled, if so we must
895      also disassemble ourselfs, in order that we may reassemble. */
896   /*
897      if nonzero structure of submatrix B cannot change then we know that
898      no processor disassembled thus we can skip this stuff
899   */
900   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
901     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
902     if (mat->was_assembled && !other_disassembled) {
903       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
904     }
905   }
906 
907   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
908     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
909   }
910   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
911   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
912   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
913 
914 #if defined(PETSC_USE_INFO)
915   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
916     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
917 
918     baij->ht_total_ct  = 0;
919     baij->ht_insert_ct = 0;
920   }
921 #endif
922   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
923     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
924 
925     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
926     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
927   }
928 
929   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
930 
931   baij->rowvalues = 0;
932   PetscFunctionReturn(0);
933 }
934 
935 #include <petscdraw.h>
936 #undef __FUNCT__
937 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
938 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
939 {
940   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
941   PetscErrorCode    ierr;
942   PetscMPIInt       size = baij->size,rank = baij->rank;
943   PetscInt          bs   = mat->rmap->bs;
944   PetscBool         iascii,isdraw;
945   PetscViewer       sviewer;
946   PetscViewerFormat format;
947 
948   PetscFunctionBegin;
949   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
950   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
951   if (iascii) {
952     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
953     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
954       MatInfo info;
955       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
956       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
957       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
958       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
959                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
960       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
961       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
962       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
963       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
964       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
965       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
966       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
967       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
968       PetscFunctionReturn(0);
969     } else if (format == PETSC_VIEWER_ASCII_INFO) {
970       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
971       PetscFunctionReturn(0);
972     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
973       PetscFunctionReturn(0);
974     }
975   }
976 
977   if (isdraw) {
978     PetscDraw draw;
979     PetscBool isnull;
980     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
981     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
982   }
983 
984   if (size == 1) {
985     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
986     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
987   } else {
988     /* assemble the entire matrix onto first processor. */
989     Mat         A;
990     Mat_SeqBAIJ *Aloc;
991     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
992     MatScalar   *a;
993 
994     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
995     /* Perhaps this should be the type of mat? */
996     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
997     if (!rank) {
998       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
999     } else {
1000       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1001     }
1002     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1003     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1004     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1005     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1006 
1007     /* copy over the A part */
1008     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1009     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1010     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1011 
1012     for (i=0; i<mbs; i++) {
1013       rvals[0] = bs*(baij->rstartbs + i);
1014       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1015       for (j=ai[i]; j<ai[i+1]; j++) {
1016         col = (baij->cstartbs+aj[j])*bs;
1017         for (k=0; k<bs; k++) {
1018           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1019           col++; a += bs;
1020         }
1021       }
1022     }
1023     /* copy over the B part */
1024     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1025     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1026     for (i=0; i<mbs; i++) {
1027       rvals[0] = bs*(baij->rstartbs + i);
1028       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1029       for (j=ai[i]; j<ai[i+1]; j++) {
1030         col = baij->garray[aj[j]]*bs;
1031         for (k=0; k<bs; k++) {
1032           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1033           col++; a += bs;
1034         }
1035       }
1036     }
1037     ierr = PetscFree(rvals);CHKERRQ(ierr);
1038     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1040     /*
1041        Everyone has to call to draw the matrix since the graphics waits are
1042        synchronized across all processors that share the PetscDraw object
1043     */
1044     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1045     if (!rank) {
1046       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1047       /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1048       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1049       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1050     }
1051     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1052     ierr = MatDestroy(&A);CHKERRQ(ierr);
1053   }
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1059 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1060 {
1061   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1062   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1063   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1064   PetscErrorCode ierr;
1065   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1066   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1067   int            fd;
1068   PetscScalar    *column_values;
1069   FILE           *file;
1070   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1071   PetscInt       message_count,flowcontrolcount;
1072 
1073   PetscFunctionBegin;
1074   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1075   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1076   nz   = bs2*(A->nz + B->nz);
1077   rlen = mat->rmap->n;
1078   if (!rank) {
1079     header[0] = MAT_FILE_CLASSID;
1080     header[1] = mat->rmap->N;
1081     header[2] = mat->cmap->N;
1082 
1083     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1084     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1085     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1086     /* get largest number of rows any processor has */
1087     range = mat->rmap->range;
1088     for (i=1; i<size; i++) {
1089       rlen = PetscMax(rlen,range[i+1] - range[i]);
1090     }
1091   } else {
1092     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1093   }
1094 
1095   ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1096   /* compute lengths of each row  */
1097   for (i=0; i<a->mbs; i++) {
1098     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1099   }
1100   /* store the row lengths to the file */
1101   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1102   if (!rank) {
1103     MPI_Status status;
1104     ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1105     rlen = (range[1] - range[0])/bs;
1106     for (i=0; i<rlen; i++) {
1107       for (j=0; j<bs; j++) {
1108         row_lens[i*bs+j] = bs*crow_lens[i];
1109       }
1110     }
1111     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1112     for (i=1; i<size; i++) {
1113       rlen = (range[i+1] - range[i])/bs;
1114       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1115       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1116       for (k=0; k<rlen; k++) {
1117         for (j=0; j<bs; j++) {
1118           row_lens[k*bs+j] = bs*crow_lens[k];
1119         }
1120       }
1121       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1122     }
1123     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1124     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1125   } else {
1126     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1127     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1128     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1129   }
1130   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1131 
1132   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1133      information needed to make it for each row from a block row. This does require more communication but still not more than
1134      the communication needed for the nonzero values  */
1135   nzmax = nz; /*  space a largest processor needs */
1136   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1137   ierr  = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1138   cnt   = 0;
1139   for (i=0; i<a->mbs; i++) {
1140     pcnt = cnt;
1141     for (j=B->i[i]; j<B->i[i+1]; j++) {
1142       if ((col = garray[B->j[j]]) > cstart) break;
1143       for (l=0; l<bs; l++) {
1144         column_indices[cnt++] = bs*col+l;
1145       }
1146     }
1147     for (k=A->i[i]; k<A->i[i+1]; k++) {
1148       for (l=0; l<bs; l++) {
1149         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1150       }
1151     }
1152     for (; j<B->i[i+1]; j++) {
1153       for (l=0; l<bs; l++) {
1154         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1155       }
1156     }
1157     len = cnt - pcnt;
1158     for (k=1; k<bs; k++) {
1159       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1160       cnt += len;
1161     }
1162   }
1163   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1164 
1165   /* store the columns to the file */
1166   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1167   if (!rank) {
1168     MPI_Status status;
1169     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1170     for (i=1; i<size; i++) {
1171       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1172       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1173       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1174       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1175     }
1176     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1177   } else {
1178     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1179     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1180     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1181     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1182   }
1183   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1184 
1185   /* load up the numerical values */
1186   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1187   cnt  = 0;
1188   for (i=0; i<a->mbs; i++) {
1189     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1190     for (j=B->i[i]; j<B->i[i+1]; j++) {
1191       if (garray[B->j[j]] > cstart) break;
1192       for (l=0; l<bs; l++) {
1193         for (ll=0; ll<bs; ll++) {
1194           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1195         }
1196       }
1197       cnt += bs;
1198     }
1199     for (k=A->i[i]; k<A->i[i+1]; k++) {
1200       for (l=0; l<bs; l++) {
1201         for (ll=0; ll<bs; ll++) {
1202           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1203         }
1204       }
1205       cnt += bs;
1206     }
1207     for (; j<B->i[i+1]; j++) {
1208       for (l=0; l<bs; l++) {
1209         for (ll=0; ll<bs; ll++) {
1210           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1211         }
1212       }
1213       cnt += bs;
1214     }
1215     cnt += (bs-1)*rlen;
1216   }
1217   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1218 
1219   /* store the column values to the file */
1220   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1221   if (!rank) {
1222     MPI_Status status;
1223     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1224     for (i=1; i<size; i++) {
1225       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1226       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1227       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1228       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1229     }
1230     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1231   } else {
1232     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1233     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1234     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1235     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1236   }
1237   ierr = PetscFree(column_values);CHKERRQ(ierr);
1238 
1239   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1240   if (file) {
1241     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1242   }
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatView_MPIBAIJ"
1248 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1249 {
1250   PetscErrorCode ierr;
1251   PetscBool      iascii,isdraw,issocket,isbinary;
1252 
1253   PetscFunctionBegin;
1254   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1255   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1256   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1257   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1258   if (iascii || isdraw || issocket) {
1259     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1260   } else if (isbinary) {
1261     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1262   }
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1268 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1269 {
1270   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274 #if defined(PETSC_USE_LOG)
1275   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1276 #endif
1277   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1278   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1279   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1280   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1281 #if defined(PETSC_USE_CTABLE)
1282   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1283 #else
1284   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1285 #endif
1286   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1287   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1288   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1289   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1290   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1291   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1292   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",NULL);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatMult_MPIBAIJ"
1310 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1311 {
1312   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1313   PetscErrorCode ierr;
1314   PetscInt       nt;
1315 
1316   PetscFunctionBegin;
1317   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1318   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1319   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1320   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1321   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1322   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1323   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1324   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1330 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1331 {
1332   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1333   PetscErrorCode ierr;
1334 
1335   PetscFunctionBegin;
1336   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1337   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1338   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1339   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1345 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1346 {
1347   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1348   PetscErrorCode ierr;
1349   PetscBool      merged;
1350 
1351   PetscFunctionBegin;
1352   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1353   /* do nondiagonal part */
1354   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1355   if (!merged) {
1356     /* send it on its way */
1357     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1358     /* do local part */
1359     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1360     /* receive remote parts: note this assumes the values are not actually */
1361     /* inserted in yy until the next line */
1362     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1363   } else {
1364     /* do local part */
1365     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1366     /* send it on its way */
1367     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1368     /* values actually were received in the Begin() but we need to call this nop */
1369     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1370   }
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1376 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1377 {
1378   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1379   PetscErrorCode ierr;
1380 
1381   PetscFunctionBegin;
1382   /* do nondiagonal part */
1383   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1384   /* send it on its way */
1385   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1386   /* do local part */
1387   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1388   /* receive remote parts: note this assumes the values are not actually */
1389   /* inserted in yy until the next line, which is true for my implementation*/
1390   /* but is not perhaps always true. */
1391   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 /*
1396   This only works correctly for square matrices where the subblock A->A is the
1397    diagonal block
1398 */
1399 #undef __FUNCT__
1400 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1401 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1402 {
1403   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1404   PetscErrorCode ierr;
1405 
1406   PetscFunctionBegin;
1407   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1408   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatScale_MPIBAIJ"
1414 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1415 {
1416   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1417   PetscErrorCode ierr;
1418 
1419   PetscFunctionBegin;
1420   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1421   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1427 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1428 {
1429   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1430   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1431   PetscErrorCode ierr;
1432   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1433   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1434   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1435 
1436   PetscFunctionBegin;
1437   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1438   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1439   mat->getrowactive = PETSC_TRUE;
1440 
1441   if (!mat->rowvalues && (idx || v)) {
1442     /*
1443         allocate enough space to hold information from the longest row.
1444     */
1445     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1446     PetscInt    max = 1,mbs = mat->mbs,tmp;
1447     for (i=0; i<mbs; i++) {
1448       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1449       if (max < tmp) max = tmp;
1450     }
1451     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1452   }
1453   lrow = row - brstart;
1454 
1455   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1456   if (!v)   {pvA = 0; pvB = 0;}
1457   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1458   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1459   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1460   nztot = nzA + nzB;
1461 
1462   cmap = mat->garray;
1463   if (v  || idx) {
1464     if (nztot) {
1465       /* Sort by increasing column numbers, assuming A and B already sorted */
1466       PetscInt imark = -1;
1467       if (v) {
1468         *v = v_p = mat->rowvalues;
1469         for (i=0; i<nzB; i++) {
1470           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1471           else break;
1472         }
1473         imark = i;
1474         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1475         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1476       }
1477       if (idx) {
1478         *idx = idx_p = mat->rowindices;
1479         if (imark > -1) {
1480           for (i=0; i<imark; i++) {
1481             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1482           }
1483         } else {
1484           for (i=0; i<nzB; i++) {
1485             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1486             else break;
1487           }
1488           imark = i;
1489         }
1490         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1491         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1492       }
1493     } else {
1494       if (idx) *idx = 0;
1495       if (v)   *v   = 0;
1496     }
1497   }
1498   *nz  = nztot;
1499   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1500   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1506 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1507 {
1508   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1509 
1510   PetscFunctionBegin;
1511   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1512   baij->getrowactive = PETSC_FALSE;
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1518 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1519 {
1520   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1521   PetscErrorCode ierr;
1522 
1523   PetscFunctionBegin;
1524   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1525   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 #undef __FUNCT__
1530 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1531 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1532 {
1533   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1534   Mat            A  = a->A,B = a->B;
1535   PetscErrorCode ierr;
1536   PetscReal      isend[5],irecv[5];
1537 
1538   PetscFunctionBegin;
1539   info->block_size = (PetscReal)matin->rmap->bs;
1540 
1541   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1542 
1543   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1544   isend[3] = info->memory;  isend[4] = info->mallocs;
1545 
1546   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1547 
1548   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1549   isend[3] += info->memory;  isend[4] += info->mallocs;
1550 
1551   if (flag == MAT_LOCAL) {
1552     info->nz_used      = isend[0];
1553     info->nz_allocated = isend[1];
1554     info->nz_unneeded  = isend[2];
1555     info->memory       = isend[3];
1556     info->mallocs      = isend[4];
1557   } else if (flag == MAT_GLOBAL_MAX) {
1558     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1559 
1560     info->nz_used      = irecv[0];
1561     info->nz_allocated = irecv[1];
1562     info->nz_unneeded  = irecv[2];
1563     info->memory       = irecv[3];
1564     info->mallocs      = irecv[4];
1565   } else if (flag == MAT_GLOBAL_SUM) {
1566     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1567 
1568     info->nz_used      = irecv[0];
1569     info->nz_allocated = irecv[1];
1570     info->nz_unneeded  = irecv[2];
1571     info->memory       = irecv[3];
1572     info->mallocs      = irecv[4];
1573   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1574   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1575   info->fill_ratio_needed = 0;
1576   info->factor_mallocs    = 0;
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1582 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1583 {
1584   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1585   PetscErrorCode ierr;
1586 
1587   PetscFunctionBegin;
1588   switch (op) {
1589   case MAT_NEW_NONZERO_LOCATIONS:
1590   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1591   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1592   case MAT_KEEP_NONZERO_PATTERN:
1593   case MAT_NEW_NONZERO_LOCATION_ERR:
1594     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1595     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1596     break;
1597   case MAT_ROW_ORIENTED:
1598     a->roworiented = flg;
1599 
1600     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1601     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1602     break;
1603   case MAT_NEW_DIAGONALS:
1604     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1605     break;
1606   case MAT_IGNORE_OFF_PROC_ENTRIES:
1607     a->donotstash = flg;
1608     break;
1609   case MAT_USE_HASH_TABLE:
1610     a->ht_flag = flg;
1611     break;
1612   case MAT_SYMMETRIC:
1613   case MAT_STRUCTURALLY_SYMMETRIC:
1614   case MAT_HERMITIAN:
1615   case MAT_SYMMETRY_ETERNAL:
1616     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1617     break;
1618   default:
1619     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1620   }
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 #undef __FUNCT__
1625 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1626 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1627 {
1628   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1629   Mat_SeqBAIJ    *Aloc;
1630   Mat            B;
1631   PetscErrorCode ierr;
1632   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1633   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1634   MatScalar      *a;
1635 
1636   PetscFunctionBegin;
1637   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1638   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1639     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1640     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1641     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1642     /* Do not know preallocation information, but must set block size */
1643     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1644   } else {
1645     B = *matout;
1646   }
1647 
1648   /* copy over the A part */
1649   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1650   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1651   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1652 
1653   for (i=0; i<mbs; i++) {
1654     rvals[0] = bs*(baij->rstartbs + i);
1655     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1656     for (j=ai[i]; j<ai[i+1]; j++) {
1657       col = (baij->cstartbs+aj[j])*bs;
1658       for (k=0; k<bs; k++) {
1659         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1660 
1661         col++; a += bs;
1662       }
1663     }
1664   }
1665   /* copy over the B part */
1666   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1667   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1668   for (i=0; i<mbs; i++) {
1669     rvals[0] = bs*(baij->rstartbs + i);
1670     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1671     for (j=ai[i]; j<ai[i+1]; j++) {
1672       col = baij->garray[aj[j]]*bs;
1673       for (k=0; k<bs; k++) {
1674         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1675         col++;
1676         a += bs;
1677       }
1678     }
1679   }
1680   ierr = PetscFree(rvals);CHKERRQ(ierr);
1681   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1682   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1683 
1684   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1685   else {
1686     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1687   }
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1693 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1694 {
1695   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1696   Mat            a     = baij->A,b = baij->B;
1697   PetscErrorCode ierr;
1698   PetscInt       s1,s2,s3;
1699 
1700   PetscFunctionBegin;
1701   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1702   if (rr) {
1703     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1704     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1705     /* Overlap communication with computation. */
1706     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1707   }
1708   if (ll) {
1709     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1710     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1711     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1712   }
1713   /* scale  the diagonal block */
1714   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1715 
1716   if (rr) {
1717     /* Do a scatter end and then right scale the off-diagonal block */
1718     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1719     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1720   }
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 #undef __FUNCT__
1725 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1726 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1727 {
1728   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1729   PetscErrorCode    ierr;
1730   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1731   PetscInt          i,*owners = A->rmap->range;
1732   PetscInt          *nprocs,j,idx,nsends,row;
1733   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1734   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1735   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1736   MPI_Comm          comm;
1737   MPI_Request       *send_waits,*recv_waits;
1738   MPI_Status        recv_status,*send_status;
1739   const PetscScalar *xx;
1740   PetscScalar       *bb;
1741 #if defined(PETSC_DEBUG)
1742   PetscBool         found = PETSC_FALSE;
1743 #endif
1744 
1745   PetscFunctionBegin;
1746   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1747   /*  first count number of contributors to each processor */
1748   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1749   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1750   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);  /* see note*/
1751   j    = 0;
1752   for (i=0; i<N; i++) {
1753     if (lastidx > (idx = rows[i])) j = 0;
1754     lastidx = idx;
1755     for (; j<size; j++) {
1756       if (idx >= owners[j] && idx < owners[j+1]) {
1757         nprocs[2*j]++;
1758         nprocs[2*j+1] = 1;
1759         owner[i]      = j;
1760 #if defined(PETSC_DEBUG)
1761         found = PETSC_TRUE;
1762 #endif
1763         break;
1764       }
1765     }
1766 #if defined(PETSC_DEBUG)
1767     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1768     found = PETSC_FALSE;
1769 #endif
1770   }
1771   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
1772 
1773   if (A->nooffproczerorows) {
1774     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1775     nrecvs = nsends;
1776     nmax   = N;
1777   } else {
1778     /* inform other processors of number of messages and max length*/
1779     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1780   }
1781 
1782   /* post receives:   */
1783   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1784   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1785   for (i=0; i<nrecvs; i++) {
1786     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1787   }
1788 
1789   /* do sends:
1790      1) starts[i] gives the starting index in svalues for stuff going to
1791      the ith processor
1792   */
1793   ierr      = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1794   ierr      = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1795   ierr      = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1796   starts[0] = 0;
1797   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1798   for (i=0; i<N; i++) {
1799     svalues[starts[owner[i]]++] = rows[i];
1800   }
1801 
1802   starts[0] = 0;
1803   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1804   count = 0;
1805   for (i=0; i<size; i++) {
1806     if (nprocs[2*i+1]) {
1807       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1808     }
1809   }
1810   ierr = PetscFree(starts);CHKERRQ(ierr);
1811 
1812   base = owners[rank];
1813 
1814   /*  wait on receives */
1815   ierr  = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1816   count = nrecvs;
1817   slen  = 0;
1818   while (count) {
1819     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1820     /* unpack receives into our local space */
1821     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1822 
1823     source[imdex] = recv_status.MPI_SOURCE;
1824     lens[imdex]   = n;
1825     slen         += n;
1826     count--;
1827   }
1828   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1829 
1830   /* move the data into the send scatter */
1831   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1832   count = 0;
1833   for (i=0; i<nrecvs; i++) {
1834     values = rvalues + i*nmax;
1835     for (j=0; j<lens[i]; j++) {
1836       lrows[count++] = values[j] - base;
1837     }
1838   }
1839   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1840   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1841   ierr = PetscFree(owner);CHKERRQ(ierr);
1842   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1843 
1844   /* fix right hand side if needed */
1845   if (x && b) {
1846     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1847     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1848     for (i=0; i<slen; i++) {
1849       bb[lrows[i]] = diag*xx[lrows[i]];
1850     }
1851     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1852     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1853   }
1854 
1855   /* actually zap the local rows */
1856   /*
1857         Zero the required rows. If the "diagonal block" of the matrix
1858      is square and the user wishes to set the diagonal we use separate
1859      code so that MatSetValues() is not called for each diagonal allocating
1860      new memory, thus calling lots of mallocs and slowing things down.
1861 
1862   */
1863   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1864   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1865   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1866     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1867   } else if (diag != 0.0) {
1868     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1869     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1870        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1871     for (i=0; i<slen; i++) {
1872       row  = lrows[i] + rstart_bs;
1873       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1874     }
1875     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1876     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1877   } else {
1878     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1879   }
1880 
1881   ierr = PetscFree(lrows);CHKERRQ(ierr);
1882 
1883   /* wait on sends */
1884   if (nsends) {
1885     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1886     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1887     ierr = PetscFree(send_status);CHKERRQ(ierr);
1888   }
1889   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1890   ierr = PetscFree(svalues);CHKERRQ(ierr);
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1896 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1897 {
1898   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1899   PetscErrorCode ierr;
1900 
1901   PetscFunctionBegin;
1902   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1907 
1908 #undef __FUNCT__
1909 #define __FUNCT__ "MatEqual_MPIBAIJ"
1910 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1911 {
1912   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1913   Mat            a,b,c,d;
1914   PetscBool      flg;
1915   PetscErrorCode ierr;
1916 
1917   PetscFunctionBegin;
1918   a = matA->A; b = matA->B;
1919   c = matB->A; d = matB->B;
1920 
1921   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1922   if (flg) {
1923     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1924   }
1925   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatCopy_MPIBAIJ"
1931 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1932 {
1933   PetscErrorCode ierr;
1934   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1935   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1936 
1937   PetscFunctionBegin;
1938   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1939   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1940     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1941   } else {
1942     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1943     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1944   }
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1950 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1951 {
1952   PetscErrorCode ierr;
1953 
1954   PetscFunctionBegin;
1955   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 #undef __FUNCT__
1960 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1961 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1962 {
1963   PetscErrorCode ierr;
1964   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1965   PetscBLASInt   bnz,one=1;
1966   Mat_SeqBAIJ    *x,*y;
1967 
1968   PetscFunctionBegin;
1969   if (str == SAME_NONZERO_PATTERN) {
1970     PetscScalar alpha = a;
1971     x    = (Mat_SeqBAIJ*)xx->A->data;
1972     y    = (Mat_SeqBAIJ*)yy->A->data;
1973     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1974     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1975     x    = (Mat_SeqBAIJ*)xx->B->data;
1976     y    = (Mat_SeqBAIJ*)yy->B->data;
1977     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1978     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1979   } else {
1980     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1981   }
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 #undef __FUNCT__
1986 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1987 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1988 {
1989   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1990   PetscErrorCode ierr;
1991 
1992   PetscFunctionBegin;
1993   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1994   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2000 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2001 {
2002   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2003   PetscErrorCode ierr;
2004 
2005   PetscFunctionBegin;
2006   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2007   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 #undef __FUNCT__
2012 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2013 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2014 {
2015   PetscErrorCode ierr;
2016   IS             iscol_local;
2017   PetscInt       csize;
2018 
2019   PetscFunctionBegin;
2020   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2021   if (call == MAT_REUSE_MATRIX) {
2022     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2023     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2024   } else {
2025     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2026   }
2027   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2028   if (call == MAT_INITIAL_MATRIX) {
2029     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2030     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2031   }
2032   PetscFunctionReturn(0);
2033 }
2034 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2035 #undef __FUNCT__
2036 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2037 /*
2038   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2039   in local and then by concatenating the local matrices the end result.
2040   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2041 */
2042 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2043 {
2044   PetscErrorCode ierr;
2045   PetscMPIInt    rank,size;
2046   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2047   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2048   Mat            M,Mreuse;
2049   MatScalar      *vwork,*aa;
2050   MPI_Comm       comm;
2051   IS             isrow_new, iscol_new;
2052   PetscBool      idflag,allrows, allcols;
2053   Mat_SeqBAIJ    *aij;
2054 
2055   PetscFunctionBegin;
2056   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2057   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2058   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2059   /* The compression and expansion should be avoided. Doesn't point
2060      out errors, might change the indices, hence buggey */
2061   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2062   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2063 
2064   /* Check for special case: each processor gets entire matrix columns */
2065   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2066   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2067   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2068   else allcols = PETSC_FALSE;
2069 
2070   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2071   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2072   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2073   else allrows = PETSC_FALSE;
2074 
2075   if (call ==  MAT_REUSE_MATRIX) {
2076     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2077     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2078     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2079   } else {
2080     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2081   }
2082   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2083   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2084   /*
2085       m - number of local rows
2086       n - number of columns (same on all processors)
2087       rstart - first row in new global matrix generated
2088   */
2089   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2090   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2091   m    = m/bs;
2092   n    = n/bs;
2093 
2094   if (call == MAT_INITIAL_MATRIX) {
2095     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2096     ii  = aij->i;
2097     jj  = aij->j;
2098 
2099     /*
2100         Determine the number of non-zeros in the diagonal and off-diagonal
2101         portions of the matrix in order to do correct preallocation
2102     */
2103 
2104     /* first get start and end of "diagonal" columns */
2105     if (csize == PETSC_DECIDE) {
2106       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2107       if (mglobal == n*bs) { /* square matrix */
2108         nlocal = m;
2109       } else {
2110         nlocal = n/size + ((n % size) > rank);
2111       }
2112     } else {
2113       nlocal = csize/bs;
2114     }
2115     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2116     rstart = rend - nlocal;
2117     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2118 
2119     /* next, compute all the lengths */
2120     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2121     olens = dlens + m;
2122     for (i=0; i<m; i++) {
2123       jend = ii[i+1] - ii[i];
2124       olen = 0;
2125       dlen = 0;
2126       for (j=0; j<jend; j++) {
2127         if (*jj < rstart || *jj >= rend) olen++;
2128         else dlen++;
2129         jj++;
2130       }
2131       olens[i] = olen;
2132       dlens[i] = dlen;
2133     }
2134     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2135     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2136     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2137     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2138     ierr = PetscFree(dlens);CHKERRQ(ierr);
2139   } else {
2140     PetscInt ml,nl;
2141 
2142     M    = *newmat;
2143     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2144     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2145     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2146     /*
2147          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2148        rather than the slower MatSetValues().
2149     */
2150     M->was_assembled = PETSC_TRUE;
2151     M->assembled     = PETSC_FALSE;
2152   }
2153   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2154   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2155   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2156   ii   = aij->i;
2157   jj   = aij->j;
2158   aa   = aij->a;
2159   for (i=0; i<m; i++) {
2160     row   = rstart/bs + i;
2161     nz    = ii[i+1] - ii[i];
2162     cwork = jj;     jj += nz;
2163     vwork = aa;     aa += nz*bs*bs;
2164     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2165   }
2166 
2167   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2168   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2169   *newmat = M;
2170 
2171   /* save submatrix used in processor for next request */
2172   if (call ==  MAT_INITIAL_MATRIX) {
2173     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2174     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2175   }
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 #undef __FUNCT__
2180 #define __FUNCT__ "MatPermute_MPIBAIJ"
2181 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2182 {
2183   MPI_Comm       comm,pcomm;
2184   PetscInt       first,rlocal_size,clocal_size,nrows;
2185   const PetscInt *rows;
2186   PetscMPIInt    size;
2187   IS             crowp,growp,irowp,lrowp,lcolp;
2188   PetscErrorCode ierr;
2189 
2190   PetscFunctionBegin;
2191   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2192   /* make a collective version of 'rowp' */
2193   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2194   if (pcomm==comm) {
2195     crowp = rowp;
2196   } else {
2197     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2198     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2199     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2200     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2201   }
2202   /* collect the global row permutation and invert it */
2203   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2204   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2205   if (pcomm!=comm) {
2206     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2207   }
2208   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2209   ierr = ISDestroy(&growp);CHKERRQ(ierr);
2210   /* get the local target indices */
2211   ierr = MatGetOwnershipRange(A,&first,NULL);CHKERRQ(ierr);
2212   ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr);
2213   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2214   ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2215   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2216   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2217   /* the column permutation is so much easier;
2218      make a local version of 'colp' and invert it */
2219   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2220   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2221   if (size==1) {
2222     lcolp = colp;
2223   } else {
2224     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2225   }
2226   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2227   /* now we just get the submatrix */
2228   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2229   if (size>1) {
2230     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2231   }
2232   /* clean up */
2233   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2234   PetscFunctionReturn(0);
2235 }
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2239 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2240 {
2241   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2242   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2243 
2244   PetscFunctionBegin;
2245   if (nghosts) *nghosts = B->nbs;
2246   if (ghosts) *ghosts = baij->garray;
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2254 /*
2255     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2256 */
2257 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2258 {
2259   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2260   PetscErrorCode         ierr;
2261   PetscMPIInt            size,*ncolsonproc,*disp,nn;
2262   PetscInt               bs,i,n,nrows,j,k,m,ncols,col;
2263   const PetscInt         *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj,*ltog;
2264   PetscInt               nis = iscoloring->n,nctot,*cols;
2265   PetscInt               *rowhit,M,cstart,cend,colb;
2266   PetscInt               *columnsforrow,l;
2267   IS                     *isa;
2268   PetscBool              done,flg;
2269   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2270   PetscInt               ctype=c->ctype;
2271 
2272   PetscFunctionBegin;
2273   if (!mat->assembled) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2274   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2275 
2276   if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,&ltog);CHKERRQ(ierr);}
2277   else     ltog = NULL;
2278   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2279   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2280 
2281   M         = mat->rmap->n/bs;
2282   cstart    = mat->cmap->rstart/bs;
2283   cend      = mat->cmap->rend/bs;
2284   c->M      = mat->rmap->N/bs;         /* set the global rows and columns and local rows */
2285   c->N      = mat->cmap->N/bs;
2286   c->m      = mat->rmap->n/bs;
2287   c->rstart = mat->rmap->rstart/bs;
2288 
2289   c->ncolors = nis;
2290   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2291   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2292   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2293   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2294   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2295   ierr       = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2296 
2297   /* Allow access to data structures of local part of matrix */
2298   if (!baij->colmap) {
2299     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2300   }
2301   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2302   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2303 
2304   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2305   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2306 
2307   for (i=0; i<nis; i++) {
2308     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2309     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2310 
2311     c->ncolumns[i] = n;
2312     if (n) {
2313       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2314       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2315       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2316     } else {
2317       c->columns[i] = 0;
2318     }
2319 
2320     if (ctype == IS_COLORING_GLOBAL) {
2321       /* Determine the total (parallel) number of columns of this color */
2322       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
2323       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2324 
2325       ierr  = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
2326       ierr  = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2327       nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j];
2328       if (!nctot) {
2329         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2330       }
2331 
2332       disp[0] = 0;
2333       for (j=1; j<size; j++) {
2334         disp[j] = disp[j-1] + ncolsonproc[j-1];
2335       }
2336 
2337       /* Get complete list of columns for color on each processor */
2338       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2339       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2340       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2341     } else if (ctype == IS_COLORING_GHOSTED) {
2342       /* Determine local number of columns of this color on this process, including ghost points */
2343       nctot = n;
2344       ierr  = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2345       ierr  = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2346     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2347 
2348     /*
2349        Mark all rows affect by these columns
2350     */
2351     /* Temporary option to allow for debugging/testing */
2352     flg  = PETSC_FALSE;
2353     ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
2354     if (!flg) { /*-----------------------------------------------------------------------------*/
2355       /* crude, fast version */
2356       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2357       /* loop over columns*/
2358       for (j=0; j<nctot; j++) {
2359         if (ctype == IS_COLORING_GHOSTED) {
2360           col = ltog[cols[j]];
2361         } else {
2362           col = cols[j];
2363         }
2364         if (col >= cstart && col < cend) {
2365           /* column is in diagonal block of matrix */
2366           rows = A_cj + A_ci[col-cstart];
2367           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2368         } else {
2369 #if defined(PETSC_USE_CTABLE)
2370           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2371           colb--;
2372 #else
2373           colb = baij->colmap[col] - 1;
2374 #endif
2375           if (colb == -1) {
2376             m = 0;
2377           } else {
2378             colb = colb/bs;
2379             rows = B_cj + B_ci[colb];
2380             m    = B_ci[colb+1] - B_ci[colb];
2381           }
2382         }
2383         /* loop over columns marking them in rowhit */
2384         for (k=0; k<m; k++) {
2385           rowhit[*rows++] = col + 1;
2386         }
2387       }
2388 
2389       /* count the number of hits */
2390       nrows = 0;
2391       for (j=0; j<M; j++) {
2392         if (rowhit[j]) nrows++;
2393       }
2394       c->nrows[i] = nrows;
2395       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2396       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2397       ierr        = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2398       nrows       = 0;
2399       for (j=0; j<M; j++) {
2400         if (rowhit[j]) {
2401           c->rows[i][nrows]          = j;
2402           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2403           nrows++;
2404         }
2405       }
2406     } else { /*-------------------------------------------------------------------------------*/
2407       /* slow version, using rowhit as a linked list */
2408       PetscInt currentcol,fm,mfm;
2409       rowhit[M] = M;
2410       nrows     = 0;
2411       /* loop over columns*/
2412       for (j=0; j<nctot; j++) {
2413         if (ctype == IS_COLORING_GHOSTED) {
2414           col = ltog[cols[j]];
2415         } else {
2416           col = cols[j];
2417         }
2418         if (col >= cstart && col < cend) {
2419           /* column is in diagonal block of matrix */
2420           rows = A_cj + A_ci[col-cstart];
2421           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2422         } else {
2423 #if defined(PETSC_USE_CTABLE)
2424           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2425           colb--;
2426 #else
2427           colb = baij->colmap[col] - 1;
2428 #endif
2429           if (colb == -1) {
2430             m = 0;
2431           } else {
2432             colb = colb/bs;
2433             rows = B_cj + B_ci[colb];
2434             m    = B_ci[colb+1] - B_ci[colb];
2435           }
2436         }
2437 
2438         /* loop over columns marking them in rowhit */
2439         fm = M;    /* fm points to first entry in linked list */
2440         for (k=0; k<m; k++) {
2441           currentcol = *rows++;
2442           /* is it already in the list? */
2443           do {
2444             mfm = fm;
2445             fm  = rowhit[fm];
2446           } while (fm < currentcol);
2447           /* not in list so add it */
2448           if (fm != currentcol) {
2449             nrows++;
2450             columnsforrow[currentcol] = col;
2451             /* next three lines insert new entry into linked list */
2452             rowhit[mfm]        = currentcol;
2453             rowhit[currentcol] = fm;
2454             fm                 = currentcol;
2455             /* fm points to present position in list since we know the columns are sorted */
2456           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2457         }
2458       }
2459       c->nrows[i] = nrows;
2460       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2461       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2462       ierr        = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2463       /* now store the linked list of rows into c->rows[i] */
2464       nrows = 0;
2465       fm    = rowhit[M];
2466       do {
2467         c->rows[i][nrows]            = fm;
2468         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2469         fm                           = rowhit[fm];
2470       } while (fm < M);
2471     } /* ---------------------------------------------------------------------------------------*/
2472     ierr = PetscFree(cols);CHKERRQ(ierr);
2473   }
2474 
2475   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2476   /*
2477        vscale will contain the "diagonal" on processor scalings followed by the off processor
2478   */
2479   if (ctype == IS_COLORING_GLOBAL) {
2480     PetscInt *garray;
2481     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2482     for (i=0; i<baij->B->cmap->n/bs; i++) {
2483       for (j=0; j<bs; j++) {
2484         garray[i*bs+j] = bs*baij->garray[i]+j;
2485       }
2486     }
2487     ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2488     ierr = PetscFree(garray);CHKERRQ(ierr);
2489     CHKMEMQ;
2490     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2491     for (k=0; k<c->ncolors; k++) {
2492       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2493       for (l=0; l<c->nrows[k]; l++) {
2494         col = c->columnsforrow[k][l];
2495         if (col >= cstart && col < cend) {
2496           /* column is in diagonal block of matrix */
2497           colb = col - cstart;
2498         } else {
2499           /* column  is in "off-processor" part */
2500 #if defined(PETSC_USE_CTABLE)
2501           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2502           colb--;
2503 #else
2504           colb = baij->colmap[col] - 1;
2505 #endif
2506           colb  = colb/bs;
2507           colb += cend - cstart;
2508         }
2509         c->vscaleforrow[k][l] = colb;
2510       }
2511     }
2512   } else if (ctype == IS_COLORING_GHOSTED) {
2513     /* Get gtol mapping */
2514     PetscInt N = mat->cmap->N,nlocal,*gtol;
2515     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2516     for (i=0; i<N; i++) gtol[i] = -1;
2517     ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr);
2518     for (i=0; i<nlocal; i++) gtol[ltog[i]] = i;
2519 
2520     c->vscale = 0; /* will be created in MatFDColoringApply() */
2521     ierr      = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2522     for (k=0; k<c->ncolors; k++) {
2523       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2524       for (l=0; l<c->nrows[k]; l++) {
2525         col = c->columnsforrow[k][l];      /* global column index */
2526 
2527         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2528       }
2529     }
2530     ierr = PetscFree(gtol);CHKERRQ(ierr);
2531   }
2532   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2533 
2534   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2535   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2536   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2537   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2538   if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,&ltog);CHKERRQ(ierr);}
2539   CHKMEMQ;
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 #undef __FUNCT__
2544 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2545 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2546 {
2547   Mat            B;
2548   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2549   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2550   Mat_SeqAIJ     *b;
2551   PetscErrorCode ierr;
2552   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2553   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2554   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2555 
2556   PetscFunctionBegin;
2557   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2558   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2559 
2560   /* ----------------------------------------------------------------
2561      Tell every processor the number of nonzeros per row
2562   */
2563   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2564   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2565     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2566   }
2567   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2568   ierr      = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2569   displs    = recvcounts + size;
2570   for (i=0; i<size; i++) {
2571     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2572     displs[i]     = A->rmap->range[i]/bs;
2573   }
2574 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2575   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2576 #else
2577   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2578 #endif
2579   /* ---------------------------------------------------------------
2580      Create the sequential matrix of the same type as the local block diagonal
2581   */
2582   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2583   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2584   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2585   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2586   b    = (Mat_SeqAIJ*)B->data;
2587 
2588   /*--------------------------------------------------------------------
2589     Copy my part of matrix column indices over
2590   */
2591   sendcount  = ad->nz + bd->nz;
2592   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2593   a_jsendbuf = ad->j;
2594   b_jsendbuf = bd->j;
2595   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2596   cnt        = 0;
2597   for (i=0; i<n; i++) {
2598 
2599     /* put in lower diagonal portion */
2600     m = bd->i[i+1] - bd->i[i];
2601     while (m > 0) {
2602       /* is it above diagonal (in bd (compressed) numbering) */
2603       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2604       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2605       m--;
2606     }
2607 
2608     /* put in diagonal portion */
2609     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2610       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2611     }
2612 
2613     /* put in upper diagonal portion */
2614     while (m-- > 0) {
2615       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2616     }
2617   }
2618   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2619 
2620   /*--------------------------------------------------------------------
2621     Gather all column indices to all processors
2622   */
2623   for (i=0; i<size; i++) {
2624     recvcounts[i] = 0;
2625     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2626       recvcounts[i] += lens[j];
2627     }
2628   }
2629   displs[0] = 0;
2630   for (i=1; i<size; i++) {
2631     displs[i] = displs[i-1] + recvcounts[i-1];
2632   }
2633 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2634   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2635 #else
2636   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2637 #endif
2638   /*--------------------------------------------------------------------
2639     Assemble the matrix into useable form (note numerical values not yet set)
2640   */
2641   /* set the b->ilen (length of each row) values */
2642   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2643   /* set the b->i indices */
2644   b->i[0] = 0;
2645   for (i=1; i<=A->rmap->N/bs; i++) {
2646     b->i[i] = b->i[i-1] + lens[i-1];
2647   }
2648   ierr = PetscFree(lens);CHKERRQ(ierr);
2649   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2650   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2651   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2652 
2653   if (A->symmetric) {
2654     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2655   } else if (A->hermitian) {
2656     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2657   } else if (A->structurally_symmetric) {
2658     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2659   }
2660   *newmat = B;
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 #undef __FUNCT__
2665 #define __FUNCT__ "MatSOR_MPIBAIJ"
2666 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2667 {
2668   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2669   PetscErrorCode ierr;
2670   Vec            bb1 = 0;
2671 
2672   PetscFunctionBegin;
2673   if (flag == SOR_APPLY_UPPER) {
2674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2675     PetscFunctionReturn(0);
2676   }
2677 
2678   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2679     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2680   }
2681 
2682   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2683     if (flag & SOR_ZERO_INITIAL_GUESS) {
2684       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2685       its--;
2686     }
2687 
2688     while (its--) {
2689       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2690       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2691 
2692       /* update rhs: bb1 = bb - B*x */
2693       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2694       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2695 
2696       /* local sweep */
2697       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2698     }
2699   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2700     if (flag & SOR_ZERO_INITIAL_GUESS) {
2701       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2702       its--;
2703     }
2704     while (its--) {
2705       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2706       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2707 
2708       /* update rhs: bb1 = bb - B*x */
2709       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2710       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2711 
2712       /* local sweep */
2713       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2714     }
2715   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2716     if (flag & SOR_ZERO_INITIAL_GUESS) {
2717       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2718       its--;
2719     }
2720     while (its--) {
2721       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2722       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2723 
2724       /* update rhs: bb1 = bb - B*x */
2725       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2726       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2727 
2728       /* local sweep */
2729       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2730     }
2731   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2732 
2733   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2734   PetscFunctionReturn(0);
2735 }
2736 
2737 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2738 
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2741 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2742 {
2743   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 
2752 /* -------------------------------------------------------------------*/
2753 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2754                                        MatGetRow_MPIBAIJ,
2755                                        MatRestoreRow_MPIBAIJ,
2756                                        MatMult_MPIBAIJ,
2757                                 /* 4*/ MatMultAdd_MPIBAIJ,
2758                                        MatMultTranspose_MPIBAIJ,
2759                                        MatMultTransposeAdd_MPIBAIJ,
2760                                        0,
2761                                        0,
2762                                        0,
2763                                 /*10*/ 0,
2764                                        0,
2765                                        0,
2766                                        MatSOR_MPIBAIJ,
2767                                        MatTranspose_MPIBAIJ,
2768                                 /*15*/ MatGetInfo_MPIBAIJ,
2769                                        MatEqual_MPIBAIJ,
2770                                        MatGetDiagonal_MPIBAIJ,
2771                                        MatDiagonalScale_MPIBAIJ,
2772                                        MatNorm_MPIBAIJ,
2773                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2774                                        MatAssemblyEnd_MPIBAIJ,
2775                                        MatSetOption_MPIBAIJ,
2776                                        MatZeroEntries_MPIBAIJ,
2777                                 /*24*/ MatZeroRows_MPIBAIJ,
2778                                        0,
2779                                        0,
2780                                        0,
2781                                        0,
2782                                 /*29*/ MatSetUp_MPIBAIJ,
2783                                        0,
2784                                        0,
2785                                        0,
2786                                        0,
2787                                 /*34*/ MatDuplicate_MPIBAIJ,
2788                                        0,
2789                                        0,
2790                                        0,
2791                                        0,
2792                                 /*39*/ MatAXPY_MPIBAIJ,
2793                                        MatGetSubMatrices_MPIBAIJ,
2794                                        MatIncreaseOverlap_MPIBAIJ,
2795                                        MatGetValues_MPIBAIJ,
2796                                        MatCopy_MPIBAIJ,
2797                                 /*44*/ 0,
2798                                        MatScale_MPIBAIJ,
2799                                        0,
2800                                        0,
2801                                        0,
2802                                 /*49*/ 0,
2803                                        0,
2804                                        0,
2805                                        0,
2806                                        0,
2807                                 /*54*/ MatFDColoringCreate_MPIBAIJ,
2808                                        0,
2809                                        MatSetUnfactored_MPIBAIJ,
2810                                        MatPermute_MPIBAIJ,
2811                                        MatSetValuesBlocked_MPIBAIJ,
2812                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2813                                        MatDestroy_MPIBAIJ,
2814                                        MatView_MPIBAIJ,
2815                                        0,
2816                                        0,
2817                                 /*64*/ 0,
2818                                        0,
2819                                        0,
2820                                        0,
2821                                        0,
2822                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2823                                        0,
2824                                        0,
2825                                        0,
2826                                        0,
2827                                 /*74*/ 0,
2828                                        MatFDColoringApply_BAIJ,
2829                                        0,
2830                                        0,
2831                                        0,
2832                                 /*79*/ 0,
2833                                        0,
2834                                        0,
2835                                        0,
2836                                        MatLoad_MPIBAIJ,
2837                                 /*84*/ 0,
2838                                        0,
2839                                        0,
2840                                        0,
2841                                        0,
2842                                 /*89*/ 0,
2843                                        0,
2844                                        0,
2845                                        0,
2846                                        0,
2847                                 /*94*/ 0,
2848                                        0,
2849                                        0,
2850                                        0,
2851                                        0,
2852                                 /*99*/ 0,
2853                                        0,
2854                                        0,
2855                                        0,
2856                                        0,
2857                                 /*104*/0,
2858                                        MatRealPart_MPIBAIJ,
2859                                        MatImaginaryPart_MPIBAIJ,
2860                                        0,
2861                                        0,
2862                                 /*109*/0,
2863                                        0,
2864                                        0,
2865                                        0,
2866                                        0,
2867                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2868                                        0,
2869                                        MatGetGhosts_MPIBAIJ,
2870                                        0,
2871                                        0,
2872                                 /*119*/0,
2873                                        0,
2874                                        0,
2875                                        0,
2876                                        0,
2877                                 /*124*/0,
2878                                        0,
2879                                        MatInvertBlockDiagonal_MPIBAIJ,
2880                                        0,
2881                                        0,
2882                                /*129*/ 0,
2883                                        0,
2884                                        0,
2885                                        0,
2886                                        0,
2887                                /*134*/ 0,
2888                                        0,
2889                                        0,
2890                                        0,
2891                                        0,
2892                                /*139*/ 0,
2893                                        0
2894 };
2895 
2896 #undef __FUNCT__
2897 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2898 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2899 {
2900   PetscFunctionBegin;
2901   *a = ((Mat_MPIBAIJ*)A->data)->A;
2902   PetscFunctionReturn(0);
2903 }
2904 
2905 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2906 
2907 #undef __FUNCT__
2908 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2909 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2910 {
2911   PetscInt       m,rstart,cstart,cend;
2912   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2913   const PetscInt *JJ    =0;
2914   PetscScalar    *values=0;
2915   PetscErrorCode ierr;
2916 
2917   PetscFunctionBegin;
2918   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2919   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2920   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2921   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2922   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2923   m      = B->rmap->n/bs;
2924   rstart = B->rmap->rstart/bs;
2925   cstart = B->cmap->rstart/bs;
2926   cend   = B->cmap->rend/bs;
2927 
2928   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2929   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2930   for (i=0; i<m; i++) {
2931     nz = ii[i+1] - ii[i];
2932     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2933     nz_max = PetscMax(nz_max,nz);
2934     JJ     = jj + ii[i];
2935     for (j=0; j<nz; j++) {
2936       if (*JJ >= cstart) break;
2937       JJ++;
2938     }
2939     d = 0;
2940     for (; j<nz; j++) {
2941       if (*JJ++ >= cend) break;
2942       d++;
2943     }
2944     d_nnz[i] = d;
2945     o_nnz[i] = nz - d;
2946   }
2947   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2948   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2949 
2950   values = (PetscScalar*)V;
2951   if (!values) {
2952     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2953     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2954   }
2955   for (i=0; i<m; i++) {
2956     PetscInt          row    = i + rstart;
2957     PetscInt          ncols  = ii[i+1] - ii[i];
2958     const PetscInt    *icols = jj + ii[i];
2959     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2960     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2961   }
2962 
2963   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2964   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2965   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2966   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2967   PetscFunctionReturn(0);
2968 }
2969 
2970 #undef __FUNCT__
2971 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2972 /*@C
2973    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2974    (the default parallel PETSc format).
2975 
2976    Collective on MPI_Comm
2977 
2978    Input Parameters:
2979 +  A - the matrix
2980 .  bs - the block size
2981 .  i - the indices into j for the start of each local row (starts with zero)
2982 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2983 -  v - optional values in the matrix
2984 
2985    Level: developer
2986 
2987 .keywords: matrix, aij, compressed row, sparse, parallel
2988 
2989 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2990 @*/
2991 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2992 {
2993   PetscErrorCode ierr;
2994 
2995   PetscFunctionBegin;
2996   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2997   PetscValidType(B,1);
2998   PetscValidLogicalCollectiveInt(B,bs,2);
2999   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3000   PetscFunctionReturn(0);
3001 }
3002 
3003 #undef __FUNCT__
3004 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3005 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
3006 {
3007   Mat_MPIBAIJ    *b;
3008   PetscErrorCode ierr;
3009   PetscInt       i;
3010   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3011 
3012   PetscFunctionBegin;
3013   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3014   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3015 
3016   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3017   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3018   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3019   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3020 
3021   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3022   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3023   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3024   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3025   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3026 
3027   if (d_nnz) {
3028     for (i=0; i<B->rmap->n/bs; i++) {
3029       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3030     }
3031   }
3032   if (o_nnz) {
3033     for (i=0; i<B->rmap->n/bs; i++) {
3034       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3035     }
3036   }
3037 
3038   b      = (Mat_MPIBAIJ*)B->data;
3039   b->bs2 = bs*bs;
3040   b->mbs = B->rmap->n/bs;
3041   b->nbs = B->cmap->n/bs;
3042   b->Mbs = B->rmap->N/bs;
3043   b->Nbs = B->cmap->N/bs;
3044 
3045   for (i=0; i<=b->size; i++) {
3046     b->rangebs[i] = B->rmap->range[i]/bs;
3047   }
3048   b->rstartbs = B->rmap->rstart/bs;
3049   b->rendbs   = B->rmap->rend/bs;
3050   b->cstartbs = B->cmap->rstart/bs;
3051   b->cendbs   = B->cmap->rend/bs;
3052 
3053   if (!B->preallocated) {
3054     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3055     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3056     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3057     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3058     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3059     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3060     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3061     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3062     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
3063   }
3064 
3065   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3066   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3067   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3068   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3069   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3070   B->preallocated = PETSC_TRUE;
3071   PetscFunctionReturn(0);
3072 }
3073 
3074 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3075 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3076 
3077 #undef __FUNCT__
3078 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3079 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3080 {
3081   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3082   PetscErrorCode ierr;
3083   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3084   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3085   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3086 
3087   PetscFunctionBegin;
3088   ierr  = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3089   ii[0] = 0;
3090   CHKMEMQ;
3091   for (i=0; i<M; i++) {
3092     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3093     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3094     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3095     /* remove one from count of matrix has diagonal */
3096     for (j=id[i]; j<id[i+1]; j++) {
3097       if (jd[j] == i) {ii[i+1]--;break;}
3098     }
3099     CHKMEMQ;
3100   }
3101   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3102   cnt  = 0;
3103   for (i=0; i<M; i++) {
3104     for (j=io[i]; j<io[i+1]; j++) {
3105       if (garray[jo[j]] > rstart) break;
3106       jj[cnt++] = garray[jo[j]];
3107       CHKMEMQ;
3108     }
3109     for (k=id[i]; k<id[i+1]; k++) {
3110       if (jd[k] != i) {
3111         jj[cnt++] = rstart + jd[k];
3112         CHKMEMQ;
3113       }
3114     }
3115     for (; j<io[i+1]; j++) {
3116       jj[cnt++] = garray[jo[j]];
3117       CHKMEMQ;
3118     }
3119   }
3120   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
3121   PetscFunctionReturn(0);
3122 }
3123 
3124 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3125 
3126 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3127 
3128 #undef __FUNCT__
3129 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3130 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3131 {
3132   PetscErrorCode ierr;
3133   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3134   Mat            B;
3135   Mat_MPIAIJ     *b;
3136 
3137   PetscFunctionBegin;
3138   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
3139 
3140   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3141   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3142   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3143   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3144   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
3145   b    = (Mat_MPIAIJ*) B->data;
3146 
3147   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3148   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3149   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3150   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3151   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3152   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3153   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3154   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3155   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3156   if (reuse == MAT_REUSE_MATRIX) {
3157     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3158   } else {
3159    *newmat = B;
3160   }
3161   PetscFunctionReturn(0);
3162 }
3163 
3164 #if defined(PETSC_HAVE_MUMPS)
3165 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3166 #endif
3167 
3168 /*MC
3169    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3170 
3171    Options Database Keys:
3172 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3173 . -mat_block_size <bs> - set the blocksize used to store the matrix
3174 - -mat_use_hash_table <fact>
3175 
3176   Level: beginner
3177 
3178 .seealso: MatCreateMPIBAIJ
3179 M*/
3180 
3181 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3182 
3183 #undef __FUNCT__
3184 #define __FUNCT__ "MatCreate_MPIBAIJ"
3185 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3186 {
3187   Mat_MPIBAIJ    *b;
3188   PetscErrorCode ierr;
3189   PetscBool      flg;
3190 
3191   PetscFunctionBegin;
3192   ierr    = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3193   B->data = (void*)b;
3194 
3195   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3196   B->assembled = PETSC_FALSE;
3197 
3198   B->insertmode = NOT_SET_VALUES;
3199   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3200   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3201 
3202   /* build local table of row and column ownerships */
3203   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3204 
3205   /* build cache for off array entries formed */
3206   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3207 
3208   b->donotstash  = PETSC_FALSE;
3209   b->colmap      = NULL;
3210   b->garray      = NULL;
3211   b->roworiented = PETSC_TRUE;
3212 
3213   /* stuff used in block assembly */
3214   b->barray = 0;
3215 
3216   /* stuff used for matrix vector multiply */
3217   b->lvec  = 0;
3218   b->Mvctx = 0;
3219 
3220   /* stuff for MatGetRow() */
3221   b->rowindices   = 0;
3222   b->rowvalues    = 0;
3223   b->getrowactive = PETSC_FALSE;
3224 
3225   /* hash table stuff */
3226   b->ht           = 0;
3227   b->hd           = 0;
3228   b->ht_size      = 0;
3229   b->ht_flag      = PETSC_FALSE;
3230   b->ht_fact      = 0;
3231   b->ht_total_ct  = 0;
3232   b->ht_insert_ct = 0;
3233 
3234   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3235   b->ijonly = PETSC_FALSE;
3236 
3237   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3238   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3239   if (flg) {
3240     PetscReal fact = 1.39;
3241     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3242     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3243     if (fact <= 1.0) fact = 1.39;
3244     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3245     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3246   }
3247   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3248 
3249 #if defined(PETSC_HAVE_MUMPS)
3250   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3251 #endif
3252   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C","MatConvert_MPIBAIJ_MPIAdj",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3253   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C","MatConvert_MPIBAIJ_MPIAIJ",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3254   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C","MatConvert_MPIBAIJ_MPISBAIJ",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3255   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_MPIBAIJ",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3256   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_MPIBAIJ",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3257   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIBAIJ",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3258   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C","MatMPIBAIJSetPreallocation_MPIBAIJ",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3259   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C","MatMPIBAIJSetPreallocationCSR_MPIBAIJ",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3260   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C","MatDiagonalScaleLocal_MPIBAIJ",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3261   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C","MatSetHashTableFactor_MPIBAIJ",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3262   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C","MatConvert_MPIBAIJ_MPIBSTRM",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3263   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3264   PetscFunctionReturn(0);
3265 }
3266 
3267 /*MC
3268    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3269 
3270    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3271    and MATMPIBAIJ otherwise.
3272 
3273    Options Database Keys:
3274 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3275 
3276   Level: beginner
3277 
3278 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3279 M*/
3280 
3281 #undef __FUNCT__
3282 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3283 /*@C
3284    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3285    (block compressed row).  For good matrix assembly performance
3286    the user should preallocate the matrix storage by setting the parameters
3287    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3288    performance can be increased by more than a factor of 50.
3289 
3290    Collective on Mat
3291 
3292    Input Parameters:
3293 +  A - the matrix
3294 .  bs   - size of blockk
3295 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3296            submatrix  (same for all local rows)
3297 .  d_nnz - array containing the number of block nonzeros in the various block rows
3298            of the in diagonal portion of the local (possibly different for each block
3299            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3300            set it even if it is zero.
3301 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3302            submatrix (same for all local rows).
3303 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3304            off-diagonal portion of the local submatrix (possibly different for
3305            each block row) or NULL.
3306 
3307    If the *_nnz parameter is given then the *_nz parameter is ignored
3308 
3309    Options Database Keys:
3310 +   -mat_block_size - size of the blocks to use
3311 -   -mat_use_hash_table <fact>
3312 
3313    Notes:
3314    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3315    than it must be used on all processors that share the object for that argument.
3316 
3317    Storage Information:
3318    For a square global matrix we define each processor's diagonal portion
3319    to be its local rows and the corresponding columns (a square submatrix);
3320    each processor's off-diagonal portion encompasses the remainder of the
3321    local matrix (a rectangular submatrix).
3322 
3323    The user can specify preallocated storage for the diagonal part of
3324    the local submatrix with either d_nz or d_nnz (not both).  Set
3325    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3326    memory allocation.  Likewise, specify preallocated storage for the
3327    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3328 
3329    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3330    the figure below we depict these three local rows and all columns (0-11).
3331 
3332 .vb
3333            0 1 2 3 4 5 6 7 8 9 10 11
3334           -------------------
3335    row 3  |  o o o d d d o o o o o o
3336    row 4  |  o o o d d d o o o o o o
3337    row 5  |  o o o d d d o o o o o o
3338           -------------------
3339 .ve
3340 
3341    Thus, any entries in the d locations are stored in the d (diagonal)
3342    submatrix, and any entries in the o locations are stored in the
3343    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3344    stored simply in the MATSEQBAIJ format for compressed row storage.
3345 
3346    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3347    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3348    In general, for PDE problems in which most nonzeros are near the diagonal,
3349    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3350    or you will get TERRIBLE performance; see the users' manual chapter on
3351    matrices.
3352 
3353    You can call MatGetInfo() to get information on how effective the preallocation was;
3354    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3355    You can also run with the option -info and look for messages with the string
3356    malloc in them to see if additional memory allocation was needed.
3357 
3358    Level: intermediate
3359 
3360 .keywords: matrix, block, aij, compressed row, sparse, parallel
3361 
3362 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3363 @*/
3364 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3365 {
3366   PetscErrorCode ierr;
3367 
3368   PetscFunctionBegin;
3369   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3370   PetscValidType(B,1);
3371   PetscValidLogicalCollectiveInt(B,bs,2);
3372   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3373   PetscFunctionReturn(0);
3374 }
3375 
3376 #undef __FUNCT__
3377 #define __FUNCT__ "MatCreateBAIJ"
3378 /*@C
3379    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3380    (block compressed row).  For good matrix assembly performance
3381    the user should preallocate the matrix storage by setting the parameters
3382    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3383    performance can be increased by more than a factor of 50.
3384 
3385    Collective on MPI_Comm
3386 
3387    Input Parameters:
3388 +  comm - MPI communicator
3389 .  bs   - size of blockk
3390 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3391            This value should be the same as the local size used in creating the
3392            y vector for the matrix-vector product y = Ax.
3393 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3394            This value should be the same as the local size used in creating the
3395            x vector for the matrix-vector product y = Ax.
3396 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3397 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3398 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3399            submatrix  (same for all local rows)
3400 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3401            of the in diagonal portion of the local (possibly different for each block
3402            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3403            and set it even if it is zero.
3404 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3405            submatrix (same for all local rows).
3406 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3407            off-diagonal portion of the local submatrix (possibly different for
3408            each block row) or NULL.
3409 
3410    Output Parameter:
3411 .  A - the matrix
3412 
3413    Options Database Keys:
3414 +   -mat_block_size - size of the blocks to use
3415 -   -mat_use_hash_table <fact>
3416 
3417    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3418    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3419    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3420 
3421    Notes:
3422    If the *_nnz parameter is given then the *_nz parameter is ignored
3423 
3424    A nonzero block is any block that as 1 or more nonzeros in it
3425 
3426    The user MUST specify either the local or global matrix dimensions
3427    (possibly both).
3428 
3429    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3430    than it must be used on all processors that share the object for that argument.
3431 
3432    Storage Information:
3433    For a square global matrix we define each processor's diagonal portion
3434    to be its local rows and the corresponding columns (a square submatrix);
3435    each processor's off-diagonal portion encompasses the remainder of the
3436    local matrix (a rectangular submatrix).
3437 
3438    The user can specify preallocated storage for the diagonal part of
3439    the local submatrix with either d_nz or d_nnz (not both).  Set
3440    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3441    memory allocation.  Likewise, specify preallocated storage for the
3442    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3443 
3444    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3445    the figure below we depict these three local rows and all columns (0-11).
3446 
3447 .vb
3448            0 1 2 3 4 5 6 7 8 9 10 11
3449           -------------------
3450    row 3  |  o o o d d d o o o o o o
3451    row 4  |  o o o d d d o o o o o o
3452    row 5  |  o o o d d d o o o o o o
3453           -------------------
3454 .ve
3455 
3456    Thus, any entries in the d locations are stored in the d (diagonal)
3457    submatrix, and any entries in the o locations are stored in the
3458    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3459    stored simply in the MATSEQBAIJ format for compressed row storage.
3460 
3461    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3462    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3463    In general, for PDE problems in which most nonzeros are near the diagonal,
3464    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3465    or you will get TERRIBLE performance; see the users' manual chapter on
3466    matrices.
3467 
3468    Level: intermediate
3469 
3470 .keywords: matrix, block, aij, compressed row, sparse, parallel
3471 
3472 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3473 @*/
3474 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3475 {
3476   PetscErrorCode ierr;
3477   PetscMPIInt    size;
3478 
3479   PetscFunctionBegin;
3480   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3481   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3482   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3483   if (size > 1) {
3484     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3485     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3486   } else {
3487     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3488     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3489   }
3490   PetscFunctionReturn(0);
3491 }
3492 
3493 #undef __FUNCT__
3494 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3495 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3496 {
3497   Mat            mat;
3498   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3499   PetscErrorCode ierr;
3500   PetscInt       len=0;
3501 
3502   PetscFunctionBegin;
3503   *newmat = 0;
3504   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3505   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3506   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3507   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3508 
3509   mat->factortype   = matin->factortype;
3510   mat->preallocated = PETSC_TRUE;
3511   mat->assembled    = PETSC_TRUE;
3512   mat->insertmode   = NOT_SET_VALUES;
3513 
3514   a             = (Mat_MPIBAIJ*)mat->data;
3515   mat->rmap->bs = matin->rmap->bs;
3516   a->bs2        = oldmat->bs2;
3517   a->mbs        = oldmat->mbs;
3518   a->nbs        = oldmat->nbs;
3519   a->Mbs        = oldmat->Mbs;
3520   a->Nbs        = oldmat->Nbs;
3521 
3522   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3523   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3524 
3525   a->size         = oldmat->size;
3526   a->rank         = oldmat->rank;
3527   a->donotstash   = oldmat->donotstash;
3528   a->roworiented  = oldmat->roworiented;
3529   a->rowindices   = 0;
3530   a->rowvalues    = 0;
3531   a->getrowactive = PETSC_FALSE;
3532   a->barray       = 0;
3533   a->rstartbs     = oldmat->rstartbs;
3534   a->rendbs       = oldmat->rendbs;
3535   a->cstartbs     = oldmat->cstartbs;
3536   a->cendbs       = oldmat->cendbs;
3537 
3538   /* hash table stuff */
3539   a->ht           = 0;
3540   a->hd           = 0;
3541   a->ht_size      = 0;
3542   a->ht_flag      = oldmat->ht_flag;
3543   a->ht_fact      = oldmat->ht_fact;
3544   a->ht_total_ct  = 0;
3545   a->ht_insert_ct = 0;
3546 
3547   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3548   if (oldmat->colmap) {
3549 #if defined(PETSC_USE_CTABLE)
3550     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3551 #else
3552     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3553     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3554     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3555 #endif
3556   } else a->colmap = 0;
3557 
3558   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3559     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3560     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3561     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3562   } else a->garray = 0;
3563 
3564   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3565   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3566   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3567   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3568   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3569 
3570   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3571   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3572   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3573   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3574   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3575   *newmat = mat;
3576   PetscFunctionReturn(0);
3577 }
3578 
3579 #undef __FUNCT__
3580 #define __FUNCT__ "MatLoad_MPIBAIJ"
3581 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3582 {
3583   PetscErrorCode ierr;
3584   int            fd;
3585   PetscInt       i,nz,j,rstart,rend;
3586   PetscScalar    *vals,*buf;
3587   MPI_Comm       comm;
3588   MPI_Status     status;
3589   PetscMPIInt    rank,size,maxnz;
3590   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3591   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3592   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3593   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3594   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3595   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3596 
3597   PetscFunctionBegin;
3598   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3599   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3600   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3601   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3602 
3603   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3604   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3605   if (!rank) {
3606     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3607     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3608     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3609   }
3610 
3611   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3612 
3613   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3614   M    = header[1]; N = header[2];
3615 
3616   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3617   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3618   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3619 
3620   /* If global sizes are set, check if they are consistent with that given in the file */
3621   if (sizesset) {
3622     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3623   }
3624   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3625   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3626 
3627   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3628 
3629   /*
3630      This code adds extra rows to make sure the number of rows is
3631      divisible by the blocksize
3632   */
3633   Mbs        = M/bs;
3634   extra_rows = bs - M + bs*Mbs;
3635   if (extra_rows == bs) extra_rows = 0;
3636   else                  Mbs++;
3637   if (extra_rows && !rank) {
3638     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3639   }
3640 
3641   /* determine ownership of all rows */
3642   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3643     mbs = Mbs/size + ((Mbs % size) > rank);
3644     m   = mbs*bs;
3645   } else { /* User set */
3646     m   = newmat->rmap->n;
3647     mbs = m/bs;
3648   }
3649   ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3650   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3651 
3652   /* process 0 needs enough room for process with most rows */
3653   if (!rank) {
3654     mmax = rowners[1];
3655     for (i=2; i<=size; i++) {
3656       mmax = PetscMax(mmax,rowners[i]);
3657     }
3658     mmax*=bs;
3659   } else mmax = -1;             /* unused, but compiler warns anyway */
3660 
3661   rowners[0] = 0;
3662   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3663   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3664   rstart = rowners[rank];
3665   rend   = rowners[rank+1];
3666 
3667   /* distribute row lengths to all processors */
3668   ierr = PetscMalloc(m*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3669   if (!rank) {
3670     mend = m;
3671     if (size == 1) mend = mend - extra_rows;
3672     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3673     for (j=mend; j<m; j++) locrowlens[j] = 1;
3674     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3675     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3676     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3677     for (j=0; j<m; j++) {
3678       procsnz[0] += locrowlens[j];
3679     }
3680     for (i=1; i<size; i++) {
3681       mend = browners[i+1] - browners[i];
3682       if (i == size-1) mend = mend - extra_rows;
3683       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3684       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3685       /* calculate the number of nonzeros on each processor */
3686       for (j=0; j<browners[i+1]-browners[i]; j++) {
3687         procsnz[i] += rowlengths[j];
3688       }
3689       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3690     }
3691     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3692   } else {
3693     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3694   }
3695 
3696   if (!rank) {
3697     /* determine max buffer needed and allocate it */
3698     maxnz = procsnz[0];
3699     for (i=1; i<size; i++) {
3700       maxnz = PetscMax(maxnz,procsnz[i]);
3701     }
3702     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3703 
3704     /* read in my part of the matrix column indices  */
3705     nz     = procsnz[0];
3706     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3707     mycols = ibuf;
3708     if (size == 1) nz -= extra_rows;
3709     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3710     if (size == 1) {
3711       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3712     }
3713 
3714     /* read in every ones (except the last) and ship off */
3715     for (i=1; i<size-1; i++) {
3716       nz   = procsnz[i];
3717       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3718       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3719     }
3720     /* read in the stuff for the last proc */
3721     if (size != 1) {
3722       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3723       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3724       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3725       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3726     }
3727     ierr = PetscFree(cols);CHKERRQ(ierr);
3728   } else {
3729     /* determine buffer space needed for message */
3730     nz = 0;
3731     for (i=0; i<m; i++) {
3732       nz += locrowlens[i];
3733     }
3734     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3735     mycols = ibuf;
3736     /* receive message of column indices*/
3737     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3738     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3739     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3740   }
3741 
3742   /* loop over local rows, determining number of off diagonal entries */
3743   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3744   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3745   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3746   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3747   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3748   rowcount = 0; nzcount = 0;
3749   for (i=0; i<mbs; i++) {
3750     dcount  = 0;
3751     odcount = 0;
3752     for (j=0; j<bs; j++) {
3753       kmax = locrowlens[rowcount];
3754       for (k=0; k<kmax; k++) {
3755         tmp = mycols[nzcount++]/bs;
3756         if (!mask[tmp]) {
3757           mask[tmp] = 1;
3758           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3759           else masked1[dcount++] = tmp;
3760         }
3761       }
3762       rowcount++;
3763     }
3764 
3765     dlens[i]  = dcount;
3766     odlens[i] = odcount;
3767 
3768     /* zero out the mask elements we set */
3769     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3770     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3771   }
3772 
3773 
3774   if (!sizesset) {
3775     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3776   }
3777   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3778 
3779   if (!rank) {
3780     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3781     /* read in my part of the matrix numerical values  */
3782     nz     = procsnz[0];
3783     vals   = buf;
3784     mycols = ibuf;
3785     if (size == 1) nz -= extra_rows;
3786     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3787     if (size == 1) {
3788       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3789     }
3790 
3791     /* insert into matrix */
3792     jj = rstart*bs;
3793     for (i=0; i<m; i++) {
3794       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3795       mycols += locrowlens[i];
3796       vals   += locrowlens[i];
3797       jj++;
3798     }
3799     /* read in other processors (except the last one) and ship out */
3800     for (i=1; i<size-1; i++) {
3801       nz   = procsnz[i];
3802       vals = buf;
3803       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3804       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3805     }
3806     /* the last proc */
3807     if (size != 1) {
3808       nz   = procsnz[i] - extra_rows;
3809       vals = buf;
3810       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3811       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3812       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3813     }
3814     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3815   } else {
3816     /* receive numeric values */
3817     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3818 
3819     /* receive message of values*/
3820     vals   = buf;
3821     mycols = ibuf;
3822     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3823 
3824     /* insert into matrix */
3825     jj = rstart*bs;
3826     for (i=0; i<m; i++) {
3827       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3828       mycols += locrowlens[i];
3829       vals   += locrowlens[i];
3830       jj++;
3831     }
3832   }
3833   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3834   ierr = PetscFree(buf);CHKERRQ(ierr);
3835   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3836   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3837   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3838   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3839   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3840   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3841   PetscFunctionReturn(0);
3842 }
3843 
3844 #undef __FUNCT__
3845 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3846 /*@
3847    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3848 
3849    Input Parameters:
3850 .  mat  - the matrix
3851 .  fact - factor
3852 
3853    Not Collective, each process can use a different factor
3854 
3855    Level: advanced
3856 
3857   Notes:
3858    This can also be set by the command line option: -mat_use_hash_table <fact>
3859 
3860 .keywords: matrix, hashtable, factor, HT
3861 
3862 .seealso: MatSetOption()
3863 @*/
3864 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3865 {
3866   PetscErrorCode ierr;
3867 
3868   PetscFunctionBegin;
3869   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3870   PetscFunctionReturn(0);
3871 }
3872 
3873 #undef __FUNCT__
3874 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3875 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3876 {
3877   Mat_MPIBAIJ *baij;
3878 
3879   PetscFunctionBegin;
3880   baij          = (Mat_MPIBAIJ*)mat->data;
3881   baij->ht_fact = fact;
3882   PetscFunctionReturn(0);
3883 }
3884 
3885 #undef __FUNCT__
3886 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3887 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3888 {
3889   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3890 
3891   PetscFunctionBegin;
3892   *Ad     = a->A;
3893   *Ao     = a->B;
3894   *colmap = a->garray;
3895   PetscFunctionReturn(0);
3896 }
3897 
3898 /*
3899     Special version for direct calls from Fortran (to eliminate two function call overheads
3900 */
3901 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3902 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3903 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3904 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3905 #endif
3906 
3907 #undef __FUNCT__
3908 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3909 /*@C
3910   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3911 
3912   Collective on Mat
3913 
3914   Input Parameters:
3915 + mat - the matrix
3916 . min - number of input rows
3917 . im - input rows
3918 . nin - number of input columns
3919 . in - input columns
3920 . v - numerical values input
3921 - addvin - INSERT_VALUES or ADD_VALUES
3922 
3923   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3924 
3925   Level: advanced
3926 
3927 .seealso:   MatSetValuesBlocked()
3928 @*/
3929 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3930 {
3931   /* convert input arguments to C version */
3932   Mat        mat  = *matin;
3933   PetscInt   m    = *min, n = *nin;
3934   InsertMode addv = *addvin;
3935 
3936   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3937   const MatScalar *value;
3938   MatScalar       *barray     = baij->barray;
3939   PetscBool       roworiented = baij->roworiented;
3940   PetscErrorCode  ierr;
3941   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3942   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3943   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3944 
3945   PetscFunctionBegin;
3946   /* tasks normally handled by MatSetValuesBlocked() */
3947   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3948 #if defined(PETSC_USE_DEBUG)
3949   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3950   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3951 #endif
3952   if (mat->assembled) {
3953     mat->was_assembled = PETSC_TRUE;
3954     mat->assembled     = PETSC_FALSE;
3955   }
3956   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3957 
3958 
3959   if (!barray) {
3960     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3961     baij->barray = barray;
3962   }
3963 
3964   if (roworiented) stepval = (n-1)*bs;
3965   else stepval = (m-1)*bs;
3966 
3967   for (i=0; i<m; i++) {
3968     if (im[i] < 0) continue;
3969 #if defined(PETSC_USE_DEBUG)
3970     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3971 #endif
3972     if (im[i] >= rstart && im[i] < rend) {
3973       row = im[i] - rstart;
3974       for (j=0; j<n; j++) {
3975         /* If NumCol = 1 then a copy is not required */
3976         if ((roworiented) && (n == 1)) {
3977           barray = (MatScalar*)v + i*bs2;
3978         } else if ((!roworiented) && (m == 1)) {
3979           barray = (MatScalar*)v + j*bs2;
3980         } else { /* Here a copy is required */
3981           if (roworiented) {
3982             value = v + i*(stepval+bs)*bs + j*bs;
3983           } else {
3984             value = v + j*(stepval+bs)*bs + i*bs;
3985           }
3986           for (ii=0; ii<bs; ii++,value+=stepval) {
3987             for (jj=0; jj<bs; jj++) {
3988               *barray++ = *value++;
3989             }
3990           }
3991           barray -=bs2;
3992         }
3993 
3994         if (in[j] >= cstart && in[j] < cend) {
3995           col  = in[j] - cstart;
3996           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3997         } else if (in[j] < 0) continue;
3998 #if defined(PETSC_USE_DEBUG)
3999         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4000 #endif
4001         else {
4002           if (mat->was_assembled) {
4003             if (!baij->colmap) {
4004               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4005             }
4006 
4007 #if defined(PETSC_USE_DEBUG)
4008 #if defined(PETSC_USE_CTABLE)
4009             { PetscInt data;
4010               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4011               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4012             }
4013 #else
4014             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4015 #endif
4016 #endif
4017 #if defined(PETSC_USE_CTABLE)
4018             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4019             col  = (col - 1)/bs;
4020 #else
4021             col = (baij->colmap[in[j]] - 1)/bs;
4022 #endif
4023             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4024               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4025               col  =  in[j];
4026             }
4027           } else col = in[j];
4028           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4029         }
4030       }
4031     } else {
4032       if (!baij->donotstash) {
4033         if (roworiented) {
4034           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4035         } else {
4036           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4037         }
4038       }
4039     }
4040   }
4041 
4042   /* task normally handled by MatSetValuesBlocked() */
4043   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4044   PetscFunctionReturn(0);
4045 }
4046 
4047 #undef __FUNCT__
4048 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4049 /*@
4050      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4051          CSR format the local rows.
4052 
4053    Collective on MPI_Comm
4054 
4055    Input Parameters:
4056 +  comm - MPI communicator
4057 .  bs - the block size, only a block size of 1 is supported
4058 .  m - number of local rows (Cannot be PETSC_DECIDE)
4059 .  n - This value should be the same as the local size used in creating the
4060        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4061        calculated if N is given) For square matrices n is almost always m.
4062 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4063 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4064 .   i - row indices
4065 .   j - column indices
4066 -   a - matrix values
4067 
4068    Output Parameter:
4069 .   mat - the matrix
4070 
4071    Level: intermediate
4072 
4073    Notes:
4074        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4075      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4076      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4077 
4078        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4079 
4080 .keywords: matrix, aij, compressed row, sparse, parallel
4081 
4082 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4083           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4084 @*/
4085 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4086 {
4087   PetscErrorCode ierr;
4088 
4089   PetscFunctionBegin;
4090   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4091   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4092   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4093   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4094   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4095   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4096   PetscFunctionReturn(0);
4097 }
4098