xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision aa2f1b4ed13ac3a331ce69b63eee5b761b5fc356)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 #include <petscsf.h>
5 
6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
18 {
19   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
20   PetscErrorCode ierr;
21   PetscInt       i,*idxb = 0;
22   PetscScalar    *va,*vb;
23   Vec            vtmp;
24 
25   PetscFunctionBegin;
26   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
27   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
28   if (idx) {
29     for (i=0; i<A->rmap->n; i++) {
30       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
31     }
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++) {
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
41       va[i] = vb[i];
42       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
43     }
44   }
45 
46   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
47   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
48   ierr = PetscFree(idxb);CHKERRQ(ierr);
49   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined(PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++) {
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
110   { \
111  \
112     brow = row/bs;  \
113     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
114     rmax = aimax[brow]; nrow = ailen[brow]; \
115     bcol = col/bs; \
116     ridx = row % bs; cidx = col % bs; \
117     low  = 0; high = nrow; \
118     while (high-low > 3) { \
119       t = (low+high)/2; \
120       if (rp[t] > bcol) high = t; \
121       else              low  = t; \
122     } \
123     for (_i=low; _i<high; _i++) { \
124       if (rp[_i] > bcol) break; \
125       if (rp[_i] == bcol) { \
126         bap = ap +  bs2*_i + bs*cidx + ridx; \
127         if (addv == ADD_VALUES) *bap += value;  \
128         else                    *bap  = value;  \
129         goto a_noinsert; \
130       } \
131     } \
132     if (a->nonew == 1) goto a_noinsert; \
133     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
134     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
135     N = nrow++ - 1;  \
136     /* shift up all the later entries in this row */ \
137     for (ii=N; ii>=_i; ii--) { \
138       rp[ii+1] = rp[ii]; \
139       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
140     } \
141     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
142     rp[_i]                      = bcol;  \
143     ap[bs2*_i + bs*cidx + ridx] = value;  \
144 a_noinsert:; \
145     ailen[brow] = nrow; \
146   }
147 
148 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
149   { \
150     brow = row/bs;  \
151     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152     rmax = bimax[brow]; nrow = bilen[brow]; \
153     bcol = col/bs; \
154     ridx = row % bs; cidx = col % bs; \
155     low  = 0; high = nrow; \
156     while (high-low > 3) { \
157       t = (low+high)/2; \
158       if (rp[t] > bcol) high = t; \
159       else              low  = t; \
160     } \
161     for (_i=low; _i<high; _i++) { \
162       if (rp[_i] > bcol) break; \
163       if (rp[_i] == bcol) { \
164         bap = ap +  bs2*_i + bs*cidx + ridx; \
165         if (addv == ADD_VALUES) *bap += value;  \
166         else                    *bap  = value;  \
167         goto b_noinsert; \
168       } \
169     } \
170     if (b->nonew == 1) goto b_noinsert; \
171     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
172     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   if (v) PetscValidScalarPointer(v,6);
216   for (i=0; i<m; i++) {
217     if (im[i] < 0) continue;
218 #if defined(PETSC_USE_DEBUG)
219     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
220 #endif
221     if (im[i] >= rstart_orig && im[i] < rend_orig) {
222       row = im[i] - rstart_orig;
223       for (j=0; j<n; j++) {
224         if (in[j] >= cstart_orig && in[j] < cend_orig) {
225           col = in[j] - cstart_orig;
226           if (roworiented) value = v[i*n+j];
227           else             value = v[i+j*m];
228           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         } else if (in[j] < 0) continue;
231 #if defined(PETSC_USE_DEBUG)
232         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined(PETSC_USE_CTABLE)
240             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
241             col  = col - 1;
242 #else
243             col = baij->colmap[in[j]/bs] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
246               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
247               col  =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
249               B    = baij->B;
250               b    = (Mat_SeqBAIJ*)(B)->data;
251               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
252               ba   =b->a;
253             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
254             else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j];
257           else             value = v[i+j*m];
258           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
260         }
261       }
262     } else {
263       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
264       if (!baij->donotstash) {
265         mat->assembled = PETSC_FALSE;
266         if (roworiented) {
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
268         } else {
269           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
270         }
271       }
272     }
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
280 {
281   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
282   const PetscScalar *value;
283   MatScalar         *barray     = baij->barray;
284   PetscBool         roworiented = baij->roworiented;
285   PetscErrorCode    ierr;
286   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
287   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
288   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
289 
290   PetscFunctionBegin;
291   if (!barray) {
292     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
293     baij->barray = barray;
294   }
295 
296   if (roworiented) stepval = (n-1)*bs;
297   else stepval = (m-1)*bs;
298 
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if ((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316             value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
320             barray += bs;
321           }
322           barray -= bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend) {
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         } else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined(PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined(PETSC_USE_CTABLE)
349             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
355               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col  =  in[j];
357             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
358           } else col = in[j];
359           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
360         }
361       }
362     } else {
363       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
364       if (!baij->donotstash) {
365         if (roworiented) {
366           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
367         } else {
368           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         }
370       }
371     }
372   }
373   PetscFunctionReturn(0);
374 }
375 
376 #define HASH_KEY 0.6180339887
377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 #undef __FUNCT__
381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
383 {
384   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
385   PetscBool      roworiented = baij->roworiented;
386   PetscErrorCode ierr;
387   PetscInt       i,j,row,col;
388   PetscInt       rstart_orig=mat->rmap->rstart;
389   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
390   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
391   PetscReal      tmp;
392   MatScalar      **HD = baij->hd,value;
393 #if defined(PETSC_USE_DEBUG)
394   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
395 #endif
396 
397   PetscFunctionBegin;
398   if (v) PetscValidScalarPointer(v,6);
399   for (i=0; i<m; i++) {
400 #if defined(PETSC_USE_DEBUG)
401     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
402     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
403 #endif
404     row = im[i];
405     if (row >= rstart_orig && row < rend_orig) {
406       for (j=0; j<n; j++) {
407         col = in[j];
408         if (roworiented) value = v[i*n+j];
409         else             value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
423             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
424           }
425         }
426 #else
427         if (HT[idx] != key) {
428           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
429           if (idx == size) {
430             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
431             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
432           }
433         }
434 #endif
435         /* A HASH table entry is found, so insert the values at the correct address */
436         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
438       }
439     } else if (!baij->donotstash) {
440       if (roworiented) {
441         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
442       } else {
443         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
444       }
445     }
446   }
447 #if defined(PETSC_USE_DEBUG)
448   baij->ht_total_ct  = total_ct;
449   baij->ht_insert_ct = insert_ct;
450 #endif
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
457 {
458   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
459   PetscBool         roworiented = baij->roworiented;
460   PetscErrorCode    ierr;
461   PetscInt          i,j,ii,jj,row,col;
462   PetscInt          rstart=baij->rstartbs;
463   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
464   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
465   PetscReal         tmp;
466   MatScalar         **HD = baij->hd,*baij_a;
467   const PetscScalar *v_t,*value;
468 #if defined(PETSC_USE_DEBUG)
469   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
470 #endif
471 
472   PetscFunctionBegin;
473   if (roworiented) stepval = (n-1)*bs;
474   else stepval = (m-1)*bs;
475 
476   for (i=0; i<m; i++) {
477 #if defined(PETSC_USE_DEBUG)
478     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
479     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
480 #endif
481     row = im[i];
482     v_t = v + i*nbs2;
483     if (row >= rstart && row < rend) {
484       for (j=0; j<n; j++) {
485         col = in[j];
486 
487         /* Look up into the Hash Table */
488         key = row*Nbs+col+1;
489         h1  = HASH(size,key,tmp);
490 
491         idx = h1;
492 #if defined(PETSC_USE_DEBUG)
493         total_ct++;
494         insert_ct++;
495         if (HT[idx] != key) {
496           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
497           if (idx == size) {
498             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
499             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
500           }
501         }
502 #else
503         if (HT[idx] != key) {
504           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
505           if (idx == size) {
506             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
507             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
508           }
509         }
510 #endif
511         baij_a = HD[idx];
512         if (roworiented) {
513           /*value = v + i*(stepval+bs)*bs + j*bs;*/
514           /* value = v + (i*(stepval+bs)+j)*bs; */
515           value = v_t;
516           v_t  += bs;
517           if (addv == ADD_VALUES) {
518             for (ii=0; ii<bs; ii++,value+=stepval) {
519               for (jj=ii; jj<bs2; jj+=bs) {
520                 baij_a[jj] += *value++;
521               }
522             }
523           } else {
524             for (ii=0; ii<bs; ii++,value+=stepval) {
525               for (jj=ii; jj<bs2; jj+=bs) {
526                 baij_a[jj] = *value++;
527               }
528             }
529           }
530         } else {
531           value = v + j*(stepval+bs)*bs + i*bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
534               for (jj=0; jj<bs; jj++) {
535                 baij_a[jj] += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
540               for (jj=0; jj<bs; jj++) {
541                 baij_a[jj] = *value++;
542               }
543             }
544           }
545         }
546       }
547     } else {
548       if (!baij->donotstash) {
549         if (roworiented) {
550           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557 #if defined(PETSC_USE_DEBUG)
558   baij->ht_total_ct  = total_ct;
559   baij->ht_insert_ct = insert_ct;
560 #endif
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatGetValues_MPIBAIJ"
566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
567 {
568   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
569   PetscErrorCode ierr;
570   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
571   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
572 
573   PetscFunctionBegin;
574   for (i=0; i<m; i++) {
575     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
576     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
577     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
578       row = idxm[i] - bsrstart;
579       for (j=0; j<n; j++) {
580         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
581         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
582         if (idxn[j] >= bscstart && idxn[j] < bscend) {
583           col  = idxn[j] - bscstart;
584           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
585         } else {
586           if (!baij->colmap) {
587             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588           }
589 #if defined(PETSC_USE_CTABLE)
590           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
591           data--;
592 #else
593           data = baij->colmap[idxn[j]/bs]-1;
594 #endif
595           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
596           else {
597             col  = data + idxn[j]%bs;
598             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599           }
600         }
601       }
602     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatNorm_MPIBAIJ"
609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
610 {
611   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
612   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
613   PetscErrorCode ierr;
614   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
615   PetscReal      sum = 0.0;
616   MatScalar      *v;
617 
618   PetscFunctionBegin;
619   if (baij->size == 1) {
620     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
621   } else {
622     if (type == NORM_FROBENIUS) {
623       v  = amat->a;
624       nz = amat->nz*bs2;
625       for (i=0; i<nz; i++) {
626         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
627       }
628       v  = bmat->a;
629       nz = bmat->nz*bs2;
630       for (i=0; i<nz; i++) {
631         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
632       }
633       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
634       *nrm = PetscSqrtReal(*nrm);
635     } else if (type == NORM_1) { /* max column sum */
636       PetscReal *tmp,*tmp2;
637       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
638       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
639       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
640       v    = amat->a; jj = amat->j;
641       for (i=0; i<amat->nz; i++) {
642         for (j=0; j<bs; j++) {
643           col = bs*(cstart + *jj) + j; /* column index */
644           for (row=0; row<bs; row++) {
645             tmp[col] += PetscAbsScalar(*v);  v++;
646           }
647         }
648         jj++;
649       }
650       v = bmat->a; jj = bmat->j;
651       for (i=0; i<bmat->nz; i++) {
652         for (j=0; j<bs; j++) {
653           col = bs*garray[*jj] + j;
654           for (row=0; row<bs; row++) {
655             tmp[col] += PetscAbsScalar(*v); v++;
656           }
657         }
658         jj++;
659       }
660       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
661       *nrm = 0.0;
662       for (j=0; j<mat->cmap->N; j++) {
663         if (tmp2[j] > *nrm) *nrm = tmp2[j];
664       }
665       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
666     } else if (type == NORM_INFINITY) { /* max row sum */
667       PetscReal *sums;
668       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
669       sum  = 0.0;
670       for (j=0; j<amat->mbs; j++) {
671         for (row=0; row<bs; row++) sums[row] = 0.0;
672         v  = amat->a + bs2*amat->i[j];
673         nz = amat->i[j+1]-amat->i[j];
674         for (i=0; i<nz; i++) {
675           for (col=0; col<bs; col++) {
676             for (row=0; row<bs; row++) {
677               sums[row] += PetscAbsScalar(*v); v++;
678             }
679           }
680         }
681         v  = bmat->a + bs2*bmat->i[j];
682         nz = bmat->i[j+1]-bmat->i[j];
683         for (i=0; i<nz; i++) {
684           for (col=0; col<bs; col++) {
685             for (row=0; row<bs; row++) {
686               sums[row] += PetscAbsScalar(*v); v++;
687             }
688           }
689         }
690         for (row=0; row<bs; row++) {
691           if (sums[row] > sum) sum = sums[row];
692         }
693       }
694       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
695       ierr = PetscFree(sums);CHKERRQ(ierr);
696     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 /*
702   Creates the hash table, and sets the table
703   This table is created only once.
704   If new entried need to be added to the matrix
705   then the hash table has to be destroyed and
706   recreated.
707 */
708 #undef __FUNCT__
709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
711 {
712   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
713   Mat            A     = baij->A,B=baij->B;
714   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
715   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
716   PetscErrorCode ierr;
717   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
718   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
719   PetscInt       *HT,key;
720   MatScalar      **HD;
721   PetscReal      tmp;
722 #if defined(PETSC_USE_INFO)
723   PetscInt ct=0,max=0;
724 #endif
725 
726   PetscFunctionBegin;
727   if (baij->ht) PetscFunctionReturn(0);
728 
729   baij->ht_size = (PetscInt)(factor*nz);
730   ht_size       = baij->ht_size;
731 
732   /* Allocate Memory for Hash Table */
733   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
734   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
735   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
736   HD   = baij->hd;
737   HT   = baij->ht;
738 
739   /* Loop Over A */
740   for (i=0; i<a->mbs; i++) {
741     for (j=ai[i]; j<ai[i+1]; j++) {
742       row = i+rstart;
743       col = aj[j]+cstart;
744 
745       key = row*Nbs + col + 1;
746       h1  = HASH(ht_size,key,tmp);
747       for (k=0; k<ht_size; k++) {
748         if (!HT[(h1+k)%ht_size]) {
749           HT[(h1+k)%ht_size] = key;
750           HD[(h1+k)%ht_size] = a->a + j*bs2;
751           break;
752 #if defined(PETSC_USE_INFO)
753         } else {
754           ct++;
755 #endif
756         }
757       }
758 #if defined(PETSC_USE_INFO)
759       if (k> max) max = k;
760 #endif
761     }
762   }
763   /* Loop Over B */
764   for (i=0; i<b->mbs; i++) {
765     for (j=bi[i]; j<bi[i+1]; j++) {
766       row = i+rstart;
767       col = garray[bj[j]];
768       key = row*Nbs + col + 1;
769       h1  = HASH(ht_size,key,tmp);
770       for (k=0; k<ht_size; k++) {
771         if (!HT[(h1+k)%ht_size]) {
772           HT[(h1+k)%ht_size] = key;
773           HD[(h1+k)%ht_size] = b->a + j*bs2;
774           break;
775 #if defined(PETSC_USE_INFO)
776         } else {
777           ct++;
778 #endif
779         }
780       }
781 #if defined(PETSC_USE_INFO)
782       if (k> max) max = k;
783 #endif
784     }
785   }
786 
787   /* Print Summary */
788 #if defined(PETSC_USE_INFO)
789   for (i=0,j=0; i<ht_size; i++) {
790     if (HT[i]) j++;
791   }
792   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
793 #endif
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
799 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
800 {
801   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
802   PetscErrorCode ierr;
803   PetscInt       nstash,reallocs;
804   InsertMode     addv;
805 
806   PetscFunctionBegin;
807   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
808 
809   /* make sure all processors are either in INSERTMODE or ADDMODE */
810   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
811   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
812   mat->insertmode = addv; /* in case this processor had no cache */
813 
814   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
815   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
816   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
817   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
818   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
819   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
825 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
826 {
827   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
828   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
829   PetscErrorCode ierr;
830   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
831   PetscInt       *row,*col;
832   PetscBool      r1,r2,r3,other_disassembled;
833   MatScalar      *val;
834   InsertMode     addv = mat->insertmode;
835   PetscMPIInt    n;
836 
837   PetscFunctionBegin;
838   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
839   if (!baij->donotstash && !mat->nooffprocentries) {
840     while (1) {
841       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
842       if (!flg) break;
843 
844       for (i=0; i<n;) {
845         /* Now identify the consecutive vals belonging to the same row */
846         for (j=i,rstart=row[j]; j<n; j++) {
847           if (row[j] != rstart) break;
848         }
849         if (j < n) ncols = j-i;
850         else       ncols = n-i;
851         /* Now assemble all these values with a single function call */
852         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
853         i    = j;
854       }
855     }
856     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
857     /* Now process the block-stash. Since the values are stashed column-oriented,
858        set the roworiented flag to column oriented, and after MatSetValues()
859        restore the original flags */
860     r1 = baij->roworiented;
861     r2 = a->roworiented;
862     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
863 
864     baij->roworiented = PETSC_FALSE;
865     a->roworiented    = PETSC_FALSE;
866 
867     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
868     while (1) {
869       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
870       if (!flg) break;
871 
872       for (i=0; i<n;) {
873         /* Now identify the consecutive vals belonging to the same row */
874         for (j=i,rstart=row[j]; j<n; j++) {
875           if (row[j] != rstart) break;
876         }
877         if (j < n) ncols = j-i;
878         else       ncols = n-i;
879         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
880         i    = j;
881       }
882     }
883     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
884 
885     baij->roworiented = r1;
886     a->roworiented    = r2;
887 
888     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
889   }
890 
891   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
892   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
893 
894   /* determine if any processor has disassembled, if so we must
895      also disassemble ourselfs, in order that we may reassemble. */
896   /*
897      if nonzero structure of submatrix B cannot change then we know that
898      no processor disassembled thus we can skip this stuff
899   */
900   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
901     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
902     if (mat->was_assembled && !other_disassembled) {
903       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
904     }
905   }
906 
907   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
908     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
909   }
910   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
911   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
912 
913 #if defined(PETSC_USE_INFO)
914   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
915     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
916 
917     baij->ht_total_ct  = 0;
918     baij->ht_insert_ct = 0;
919   }
920 #endif
921   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
922     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
923 
924     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
925     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
926   }
927 
928   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
929 
930   baij->rowvalues = 0;
931   PetscFunctionReturn(0);
932 }
933 
934 #include <petscdraw.h>
935 #undef __FUNCT__
936 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
937 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
938 {
939   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
940   PetscErrorCode    ierr;
941   PetscMPIInt       size = baij->size,rank = baij->rank;
942   PetscInt          bs   = mat->rmap->bs;
943   PetscBool         iascii,isdraw;
944   PetscViewer       sviewer;
945   PetscViewerFormat format;
946 
947   PetscFunctionBegin;
948   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
949   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
950   if (iascii) {
951     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
952     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
953       MatInfo info;
954       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
955       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
956       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
957       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
958                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
959       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
960       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
961       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
962       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
963       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
964       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
965       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
966       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
967       PetscFunctionReturn(0);
968     } else if (format == PETSC_VIEWER_ASCII_INFO) {
969       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
970       PetscFunctionReturn(0);
971     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
972       PetscFunctionReturn(0);
973     }
974   }
975 
976   if (isdraw) {
977     PetscDraw draw;
978     PetscBool isnull;
979     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
980     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
981   }
982 
983   if (size == 1) {
984     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
985     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
986   } else {
987     /* assemble the entire matrix onto first processor. */
988     Mat         A;
989     Mat_SeqBAIJ *Aloc;
990     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
991     MatScalar   *a;
992 
993     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
994     /* Perhaps this should be the type of mat? */
995     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
996     if (!rank) {
997       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
998     } else {
999       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1000     }
1001     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1002     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1003     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1004     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1005 
1006     /* copy over the A part */
1007     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1008     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1009     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1010 
1011     for (i=0; i<mbs; i++) {
1012       rvals[0] = bs*(baij->rstartbs + i);
1013       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1014       for (j=ai[i]; j<ai[i+1]; j++) {
1015         col = (baij->cstartbs+aj[j])*bs;
1016         for (k=0; k<bs; k++) {
1017           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1018           col++; a += bs;
1019         }
1020       }
1021     }
1022     /* copy over the B part */
1023     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1024     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = baij->garray[aj[j]]*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     ierr = PetscFree(rvals);CHKERRQ(ierr);
1037     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039     /*
1040        Everyone has to call to draw the matrix since the graphics waits are
1041        synchronized across all processors that share the PetscDraw object
1042     */
1043     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1044     if (!rank) {
1045       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1046       /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1047       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1048       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1049     }
1050     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1051     ierr = MatDestroy(&A);CHKERRQ(ierr);
1052   }
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 #undef __FUNCT__
1057 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1058 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1059 {
1060   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1061   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1062   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1063   PetscErrorCode ierr;
1064   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1065   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1066   int            fd;
1067   PetscScalar    *column_values;
1068   FILE           *file;
1069   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1070   PetscInt       message_count,flowcontrolcount;
1071 
1072   PetscFunctionBegin;
1073   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1074   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1075   nz   = bs2*(A->nz + B->nz);
1076   rlen = mat->rmap->n;
1077   if (!rank) {
1078     header[0] = MAT_FILE_CLASSID;
1079     header[1] = mat->rmap->N;
1080     header[2] = mat->cmap->N;
1081 
1082     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1083     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1084     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1085     /* get largest number of rows any processor has */
1086     range = mat->rmap->range;
1087     for (i=1; i<size; i++) {
1088       rlen = PetscMax(rlen,range[i+1] - range[i]);
1089     }
1090   } else {
1091     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1092   }
1093 
1094   ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1095   /* compute lengths of each row  */
1096   for (i=0; i<a->mbs; i++) {
1097     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1098   }
1099   /* store the row lengths to the file */
1100   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1101   if (!rank) {
1102     MPI_Status status;
1103     ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1104     rlen = (range[1] - range[0])/bs;
1105     for (i=0; i<rlen; i++) {
1106       for (j=0; j<bs; j++) {
1107         row_lens[i*bs+j] = bs*crow_lens[i];
1108       }
1109     }
1110     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1111     for (i=1; i<size; i++) {
1112       rlen = (range[i+1] - range[i])/bs;
1113       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1114       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1115       for (k=0; k<rlen; k++) {
1116         for (j=0; j<bs; j++) {
1117           row_lens[k*bs+j] = bs*crow_lens[k];
1118         }
1119       }
1120       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1121     }
1122     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1123     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1124   } else {
1125     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1126     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1127     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1128   }
1129   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1130 
1131   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1132      information needed to make it for each row from a block row. This does require more communication but still not more than
1133      the communication needed for the nonzero values  */
1134   nzmax = nz; /*  space a largest processor needs */
1135   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1136   ierr  = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1137   cnt   = 0;
1138   for (i=0; i<a->mbs; i++) {
1139     pcnt = cnt;
1140     for (j=B->i[i]; j<B->i[i+1]; j++) {
1141       if ((col = garray[B->j[j]]) > cstart) break;
1142       for (l=0; l<bs; l++) {
1143         column_indices[cnt++] = bs*col+l;
1144       }
1145     }
1146     for (k=A->i[i]; k<A->i[i+1]; k++) {
1147       for (l=0; l<bs; l++) {
1148         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1149       }
1150     }
1151     for (; j<B->i[i+1]; j++) {
1152       for (l=0; l<bs; l++) {
1153         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1154       }
1155     }
1156     len = cnt - pcnt;
1157     for (k=1; k<bs; k++) {
1158       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1159       cnt += len;
1160     }
1161   }
1162   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1163 
1164   /* store the columns to the file */
1165   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1166   if (!rank) {
1167     MPI_Status status;
1168     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1169     for (i=1; i<size; i++) {
1170       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1171       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1172       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1173       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1174     }
1175     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1176   } else {
1177     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1178     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1179     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1180     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1181   }
1182   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1183 
1184   /* load up the numerical values */
1185   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1186   cnt  = 0;
1187   for (i=0; i<a->mbs; i++) {
1188     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1189     for (j=B->i[i]; j<B->i[i+1]; j++) {
1190       if (garray[B->j[j]] > cstart) break;
1191       for (l=0; l<bs; l++) {
1192         for (ll=0; ll<bs; ll++) {
1193           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1194         }
1195       }
1196       cnt += bs;
1197     }
1198     for (k=A->i[i]; k<A->i[i+1]; k++) {
1199       for (l=0; l<bs; l++) {
1200         for (ll=0; ll<bs; ll++) {
1201           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1202         }
1203       }
1204       cnt += bs;
1205     }
1206     for (; j<B->i[i+1]; j++) {
1207       for (l=0; l<bs; l++) {
1208         for (ll=0; ll<bs; ll++) {
1209           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1210         }
1211       }
1212       cnt += bs;
1213     }
1214     cnt += (bs-1)*rlen;
1215   }
1216   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1217 
1218   /* store the column values to the file */
1219   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1220   if (!rank) {
1221     MPI_Status status;
1222     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1223     for (i=1; i<size; i++) {
1224       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1225       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1226       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1227       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1228     }
1229     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1230   } else {
1231     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1232     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1233     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1234     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1235   }
1236   ierr = PetscFree(column_values);CHKERRQ(ierr);
1237 
1238   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1239   if (file) {
1240     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1241   }
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatView_MPIBAIJ"
1247 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1248 {
1249   PetscErrorCode ierr;
1250   PetscBool      iascii,isdraw,issocket,isbinary;
1251 
1252   PetscFunctionBegin;
1253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1254   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1255   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1256   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1257   if (iascii || isdraw || issocket) {
1258     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1259   } else if (isbinary) {
1260     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1261   }
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1267 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1268 {
1269   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1270   PetscErrorCode ierr;
1271 
1272   PetscFunctionBegin;
1273 #if defined(PETSC_USE_LOG)
1274   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1275 #endif
1276   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1277   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1278   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1279   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1280 #if defined(PETSC_USE_CTABLE)
1281   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1282 #else
1283   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1284 #endif
1285   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1286   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1287   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1288   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1289   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1290   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1291   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1292   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1293 
1294   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatMult_MPIBAIJ"
1309 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1310 {
1311   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1312   PetscErrorCode ierr;
1313   PetscInt       nt;
1314 
1315   PetscFunctionBegin;
1316   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1317   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1318   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1319   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1320   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1321   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1322   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1323   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1329 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1330 {
1331   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1332   PetscErrorCode ierr;
1333 
1334   PetscFunctionBegin;
1335   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1336   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1337   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1338   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1344 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1345 {
1346   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1347   PetscErrorCode ierr;
1348   PetscBool      merged;
1349 
1350   PetscFunctionBegin;
1351   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1352   /* do nondiagonal part */
1353   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1354   if (!merged) {
1355     /* send it on its way */
1356     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1357     /* do local part */
1358     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1359     /* receive remote parts: note this assumes the values are not actually */
1360     /* inserted in yy until the next line */
1361     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1362   } else {
1363     /* do local part */
1364     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1365     /* send it on its way */
1366     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1367     /* values actually were received in the Begin() but we need to call this nop */
1368     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1369   }
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1375 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1376 {
1377   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1378   PetscErrorCode ierr;
1379 
1380   PetscFunctionBegin;
1381   /* do nondiagonal part */
1382   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1383   /* send it on its way */
1384   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1385   /* do local part */
1386   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1387   /* receive remote parts: note this assumes the values are not actually */
1388   /* inserted in yy until the next line, which is true for my implementation*/
1389   /* but is not perhaps always true. */
1390   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 /*
1395   This only works correctly for square matrices where the subblock A->A is the
1396    diagonal block
1397 */
1398 #undef __FUNCT__
1399 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1400 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1401 {
1402   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1403   PetscErrorCode ierr;
1404 
1405   PetscFunctionBegin;
1406   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1407   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatScale_MPIBAIJ"
1413 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1414 {
1415   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1420   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1426 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1427 {
1428   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1429   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1430   PetscErrorCode ierr;
1431   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1432   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1433   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1434 
1435   PetscFunctionBegin;
1436   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1437   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1438   mat->getrowactive = PETSC_TRUE;
1439 
1440   if (!mat->rowvalues && (idx || v)) {
1441     /*
1442         allocate enough space to hold information from the longest row.
1443     */
1444     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1445     PetscInt    max = 1,mbs = mat->mbs,tmp;
1446     for (i=0; i<mbs; i++) {
1447       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1448       if (max < tmp) max = tmp;
1449     }
1450     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1451   }
1452   lrow = row - brstart;
1453 
1454   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1455   if (!v)   {pvA = 0; pvB = 0;}
1456   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1457   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1458   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1459   nztot = nzA + nzB;
1460 
1461   cmap = mat->garray;
1462   if (v  || idx) {
1463     if (nztot) {
1464       /* Sort by increasing column numbers, assuming A and B already sorted */
1465       PetscInt imark = -1;
1466       if (v) {
1467         *v = v_p = mat->rowvalues;
1468         for (i=0; i<nzB; i++) {
1469           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1470           else break;
1471         }
1472         imark = i;
1473         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1474         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1475       }
1476       if (idx) {
1477         *idx = idx_p = mat->rowindices;
1478         if (imark > -1) {
1479           for (i=0; i<imark; i++) {
1480             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1481           }
1482         } else {
1483           for (i=0; i<nzB; i++) {
1484             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1485             else break;
1486           }
1487           imark = i;
1488         }
1489         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1490         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1491       }
1492     } else {
1493       if (idx) *idx = 0;
1494       if (v)   *v   = 0;
1495     }
1496   }
1497   *nz  = nztot;
1498   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1499   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1505 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1506 {
1507   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1508 
1509   PetscFunctionBegin;
1510   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1511   baij->getrowactive = PETSC_FALSE;
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 #undef __FUNCT__
1516 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1517 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1518 {
1519   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1520   PetscErrorCode ierr;
1521 
1522   PetscFunctionBegin;
1523   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1524   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 #undef __FUNCT__
1529 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1530 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1531 {
1532   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1533   Mat            A  = a->A,B = a->B;
1534   PetscErrorCode ierr;
1535   PetscReal      isend[5],irecv[5];
1536 
1537   PetscFunctionBegin;
1538   info->block_size = (PetscReal)matin->rmap->bs;
1539 
1540   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1541 
1542   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1543   isend[3] = info->memory;  isend[4] = info->mallocs;
1544 
1545   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1546 
1547   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1548   isend[3] += info->memory;  isend[4] += info->mallocs;
1549 
1550   if (flag == MAT_LOCAL) {
1551     info->nz_used      = isend[0];
1552     info->nz_allocated = isend[1];
1553     info->nz_unneeded  = isend[2];
1554     info->memory       = isend[3];
1555     info->mallocs      = isend[4];
1556   } else if (flag == MAT_GLOBAL_MAX) {
1557     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1558 
1559     info->nz_used      = irecv[0];
1560     info->nz_allocated = irecv[1];
1561     info->nz_unneeded  = irecv[2];
1562     info->memory       = irecv[3];
1563     info->mallocs      = irecv[4];
1564   } else if (flag == MAT_GLOBAL_SUM) {
1565     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1566 
1567     info->nz_used      = irecv[0];
1568     info->nz_allocated = irecv[1];
1569     info->nz_unneeded  = irecv[2];
1570     info->memory       = irecv[3];
1571     info->mallocs      = irecv[4];
1572   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1573   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1574   info->fill_ratio_needed = 0;
1575   info->factor_mallocs    = 0;
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1581 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1582 {
1583   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1584   PetscErrorCode ierr;
1585 
1586   PetscFunctionBegin;
1587   switch (op) {
1588   case MAT_NEW_NONZERO_LOCATIONS:
1589   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1590   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1591   case MAT_KEEP_NONZERO_PATTERN:
1592   case MAT_NEW_NONZERO_LOCATION_ERR:
1593     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1594     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1595     break;
1596   case MAT_ROW_ORIENTED:
1597     a->roworiented = flg;
1598 
1599     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1600     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1601     break;
1602   case MAT_NEW_DIAGONALS:
1603     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1604     break;
1605   case MAT_IGNORE_OFF_PROC_ENTRIES:
1606     a->donotstash = flg;
1607     break;
1608   case MAT_USE_HASH_TABLE:
1609     a->ht_flag = flg;
1610     break;
1611   case MAT_SYMMETRIC:
1612   case MAT_STRUCTURALLY_SYMMETRIC:
1613   case MAT_HERMITIAN:
1614   case MAT_SYMMETRY_ETERNAL:
1615     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1616     break;
1617   default:
1618     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1619   }
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 #undef __FUNCT__
1624 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1625 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1626 {
1627   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1628   Mat_SeqBAIJ    *Aloc;
1629   Mat            B;
1630   PetscErrorCode ierr;
1631   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1632   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1633   MatScalar      *a;
1634 
1635   PetscFunctionBegin;
1636   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1637   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1638     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1639     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1640     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1641     /* Do not know preallocation information, but must set block size */
1642     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1643   } else {
1644     B = *matout;
1645   }
1646 
1647   /* copy over the A part */
1648   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1649   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1650   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1651 
1652   for (i=0; i<mbs; i++) {
1653     rvals[0] = bs*(baij->rstartbs + i);
1654     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1655     for (j=ai[i]; j<ai[i+1]; j++) {
1656       col = (baij->cstartbs+aj[j])*bs;
1657       for (k=0; k<bs; k++) {
1658         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1659 
1660         col++; a += bs;
1661       }
1662     }
1663   }
1664   /* copy over the B part */
1665   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1666   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1667   for (i=0; i<mbs; i++) {
1668     rvals[0] = bs*(baij->rstartbs + i);
1669     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1670     for (j=ai[i]; j<ai[i+1]; j++) {
1671       col = baij->garray[aj[j]]*bs;
1672       for (k=0; k<bs; k++) {
1673         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1674         col++;
1675         a += bs;
1676       }
1677     }
1678   }
1679   ierr = PetscFree(rvals);CHKERRQ(ierr);
1680   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1681   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1682 
1683   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1684   else {
1685     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1686   }
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1692 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1693 {
1694   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1695   Mat            a     = baij->A,b = baij->B;
1696   PetscErrorCode ierr;
1697   PetscInt       s1,s2,s3;
1698 
1699   PetscFunctionBegin;
1700   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1701   if (rr) {
1702     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1703     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1704     /* Overlap communication with computation. */
1705     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1706   }
1707   if (ll) {
1708     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1709     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1710     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1711   }
1712   /* scale  the diagonal block */
1713   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1714 
1715   if (rr) {
1716     /* Do a scatter end and then right scale the off-diagonal block */
1717     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1718     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1719   }
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 #undef __FUNCT__
1724 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1725 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1726 {
1727   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1728   PetscInt      *owners = A->rmap->range;
1729   PetscInt       n      = A->rmap->n;
1730   PetscMPIInt    size   = l->size;
1731   PetscSF        sf;
1732   PetscInt      *lrows;
1733   PetscSFNode   *rrows;
1734   PetscInt       lastidx = -1, r, p = 0, len = 0;
1735   PetscErrorCode ierr;
1736 
1737   PetscFunctionBegin;
1738   /* Create SF where leaves are input rows and roots are owned rows */
1739   ierr = PetscMalloc(n * sizeof(PetscInt), &lrows);CHKERRQ(ierr);
1740   for (r = 0; r < n; ++r) lrows[r] = -1;
1741   ierr = PetscMalloc(N * sizeof(PetscSFNode), &rrows);CHKERRQ(ierr);
1742   for (r = 0; r < N; ++r) {
1743     const PetscInt idx   = rows[r];
1744     PetscBool      found = PETSC_FALSE;
1745     /* Trick for efficient searching for sorted rows */
1746     if (lastidx > idx) p = 0;
1747     lastidx = idx;
1748     for (; p < size; ++p) {
1749       if (idx >= owners[p] && idx < owners[p+1]) {
1750         rrows[r].rank  = p;
1751         rrows[r].index = rows[r] - owners[p];
1752         found = PETSC_TRUE;
1753         break;
1754       }
1755     }
1756     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1757   }
1758   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1759   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1760   /* Collect flags for rows to be zeroed */
1761   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1762   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1763   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1764   /* Compress and put in row numbers */
1765   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1766   /* fix right hand side if needed */
1767   if (x && b) {
1768     const PetscScalar *xx;
1769     PetscScalar       *bb;
1770 
1771     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1772     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1773     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1774     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1775     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1776   }
1777 
1778   /* actually zap the local rows */
1779   /*
1780         Zero the required rows. If the "diagonal block" of the matrix
1781      is square and the user wishes to set the diagonal we use separate
1782      code so that MatSetValues() is not called for each diagonal allocating
1783      new memory, thus calling lots of mallocs and slowing things down.
1784 
1785   */
1786   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1787   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr);
1788   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1789     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr);
1790   } else if (diag != 0.0) {
1791     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1792     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1793        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1794     for (r = 0; r < len; ++r) {
1795       const PetscInt row = lrows[r] + A->rmap->rstart;
1796       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1797     }
1798     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1799     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1800   } else {
1801     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1802   }
1803   ierr = PetscFree(lrows);CHKERRQ(ierr);
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 #undef __FUNCT__
1808 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1809 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1810 {
1811   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1812   PetscErrorCode ierr;
1813 
1814   PetscFunctionBegin;
1815   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1816   PetscFunctionReturn(0);
1817 }
1818 
1819 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1820 
1821 #undef __FUNCT__
1822 #define __FUNCT__ "MatEqual_MPIBAIJ"
1823 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1824 {
1825   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1826   Mat            a,b,c,d;
1827   PetscBool      flg;
1828   PetscErrorCode ierr;
1829 
1830   PetscFunctionBegin;
1831   a = matA->A; b = matA->B;
1832   c = matB->A; d = matB->B;
1833 
1834   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1835   if (flg) {
1836     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1837   }
1838   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 #undef __FUNCT__
1843 #define __FUNCT__ "MatCopy_MPIBAIJ"
1844 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1845 {
1846   PetscErrorCode ierr;
1847   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1848   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1849 
1850   PetscFunctionBegin;
1851   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1852   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1853     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1854   } else {
1855     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1856     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1857   }
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1863 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1864 {
1865   PetscErrorCode ierr;
1866 
1867   PetscFunctionBegin;
1868   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1869   PetscFunctionReturn(0);
1870 }
1871 
1872 #undef __FUNCT__
1873 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1874 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1875 {
1876   PetscErrorCode ierr;
1877   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1878   PetscBLASInt   bnz,one=1;
1879   Mat_SeqBAIJ    *x,*y;
1880 
1881   PetscFunctionBegin;
1882   if (str == SAME_NONZERO_PATTERN) {
1883     PetscScalar alpha = a;
1884     x    = (Mat_SeqBAIJ*)xx->A->data;
1885     y    = (Mat_SeqBAIJ*)yy->A->data;
1886     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1887     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1888     x    = (Mat_SeqBAIJ*)xx->B->data;
1889     y    = (Mat_SeqBAIJ*)yy->B->data;
1890     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1891     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1892   } else {
1893     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1894   }
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1900 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1901 {
1902   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1903   PetscErrorCode ierr;
1904 
1905   PetscFunctionBegin;
1906   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1907   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 #undef __FUNCT__
1912 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1913 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1914 {
1915   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1916   PetscErrorCode ierr;
1917 
1918   PetscFunctionBegin;
1919   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1920   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1921   PetscFunctionReturn(0);
1922 }
1923 
1924 #undef __FUNCT__
1925 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1926 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1927 {
1928   PetscErrorCode ierr;
1929   IS             iscol_local;
1930   PetscInt       csize;
1931 
1932   PetscFunctionBegin;
1933   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1934   if (call == MAT_REUSE_MATRIX) {
1935     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1936     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1937   } else {
1938     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1939   }
1940   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1941   if (call == MAT_INITIAL_MATRIX) {
1942     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1943     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1944   }
1945   PetscFunctionReturn(0);
1946 }
1947 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
1950 /*
1951   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1952   in local and then by concatenating the local matrices the end result.
1953   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1954 */
1955 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1956 {
1957   PetscErrorCode ierr;
1958   PetscMPIInt    rank,size;
1959   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1960   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
1961   Mat            M,Mreuse;
1962   MatScalar      *vwork,*aa;
1963   MPI_Comm       comm;
1964   IS             isrow_new, iscol_new;
1965   PetscBool      idflag,allrows, allcols;
1966   Mat_SeqBAIJ    *aij;
1967 
1968   PetscFunctionBegin;
1969   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
1970   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1971   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1972   /* The compression and expansion should be avoided. Doesn't point
1973      out errors, might change the indices, hence buggey */
1974   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
1975   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
1976 
1977   /* Check for special case: each processor gets entire matrix columns */
1978   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
1979   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
1980   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
1981   else allcols = PETSC_FALSE;
1982 
1983   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
1984   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
1985   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
1986   else allrows = PETSC_FALSE;
1987 
1988   if (call ==  MAT_REUSE_MATRIX) {
1989     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1990     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1991     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
1992   } else {
1993     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
1994   }
1995   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
1996   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
1997   /*
1998       m - number of local rows
1999       n - number of columns (same on all processors)
2000       rstart - first row in new global matrix generated
2001   */
2002   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2003   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2004   m    = m/bs;
2005   n    = n/bs;
2006 
2007   if (call == MAT_INITIAL_MATRIX) {
2008     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2009     ii  = aij->i;
2010     jj  = aij->j;
2011 
2012     /*
2013         Determine the number of non-zeros in the diagonal and off-diagonal
2014         portions of the matrix in order to do correct preallocation
2015     */
2016 
2017     /* first get start and end of "diagonal" columns */
2018     if (csize == PETSC_DECIDE) {
2019       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2020       if (mglobal == n*bs) { /* square matrix */
2021         nlocal = m;
2022       } else {
2023         nlocal = n/size + ((n % size) > rank);
2024       }
2025     } else {
2026       nlocal = csize/bs;
2027     }
2028     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2029     rstart = rend - nlocal;
2030     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2031 
2032     /* next, compute all the lengths */
2033     ierr  = PetscMalloc2(m+1,PetscInt,&dlens,m+1,PetscInt,&olens);CHKERRQ(ierr);
2034     for (i=0; i<m; i++) {
2035       jend = ii[i+1] - ii[i];
2036       olen = 0;
2037       dlen = 0;
2038       for (j=0; j<jend; j++) {
2039         if (*jj < rstart || *jj >= rend) olen++;
2040         else dlen++;
2041         jj++;
2042       }
2043       olens[i] = olen;
2044       dlens[i] = dlen;
2045     }
2046     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2047     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2048     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2049     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2050     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2051   } else {
2052     PetscInt ml,nl;
2053 
2054     M    = *newmat;
2055     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2056     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2057     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2058     /*
2059          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2060        rather than the slower MatSetValues().
2061     */
2062     M->was_assembled = PETSC_TRUE;
2063     M->assembled     = PETSC_FALSE;
2064   }
2065   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2066   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2067   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2068   ii   = aij->i;
2069   jj   = aij->j;
2070   aa   = aij->a;
2071   for (i=0; i<m; i++) {
2072     row   = rstart/bs + i;
2073     nz    = ii[i+1] - ii[i];
2074     cwork = jj;     jj += nz;
2075     vwork = aa;     aa += nz*bs*bs;
2076     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2077   }
2078 
2079   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2080   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2081   *newmat = M;
2082 
2083   /* save submatrix used in processor for next request */
2084   if (call ==  MAT_INITIAL_MATRIX) {
2085     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2086     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2087   }
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 #undef __FUNCT__
2092 #define __FUNCT__ "MatPermute_MPIBAIJ"
2093 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2094 {
2095   MPI_Comm       comm,pcomm;
2096   PetscInt       clocal_size,nrows;
2097   const PetscInt *rows;
2098   PetscMPIInt    size;
2099   IS             crowp,lcolp;
2100   PetscErrorCode ierr;
2101 
2102   PetscFunctionBegin;
2103   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2104   /* make a collective version of 'rowp' */
2105   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2106   if (pcomm==comm) {
2107     crowp = rowp;
2108   } else {
2109     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2110     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2111     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2112     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2113   }
2114   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2115   /* make a local version of 'colp' */
2116   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2117   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2118   if (size==1) {
2119     lcolp = colp;
2120   } else {
2121     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2122   }
2123   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2124   /* now we just get the submatrix */
2125   ierr = MatGetLocalSize(A,PETSC_NULL,&clocal_size);CHKERRQ(ierr);
2126   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2127   /* clean up */
2128   if (pcomm!=comm) {
2129     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2130   }
2131   if (size>1) {
2132     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2133   }
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 #undef __FUNCT__
2138 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2139 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2140 {
2141   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2142   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2143 
2144   PetscFunctionBegin;
2145   if (nghosts) *nghosts = B->nbs;
2146   if (ghosts) *ghosts = baij->garray;
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 #undef __FUNCT__
2151 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2152 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2153 {
2154   Mat            B;
2155   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2156   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2157   Mat_SeqAIJ     *b;
2158   PetscErrorCode ierr;
2159   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2160   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2161   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2162 
2163   PetscFunctionBegin;
2164   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2165   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2166 
2167   /* ----------------------------------------------------------------
2168      Tell every processor the number of nonzeros per row
2169   */
2170   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2171   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2172     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2173   }
2174   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2175   ierr      = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2176   displs    = recvcounts + size;
2177   for (i=0; i<size; i++) {
2178     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2179     displs[i]     = A->rmap->range[i]/bs;
2180   }
2181 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2182   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2183 #else
2184   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2185 #endif
2186   /* ---------------------------------------------------------------
2187      Create the sequential matrix of the same type as the local block diagonal
2188   */
2189   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2190   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2191   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2192   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2193   b    = (Mat_SeqAIJ*)B->data;
2194 
2195   /*--------------------------------------------------------------------
2196     Copy my part of matrix column indices over
2197   */
2198   sendcount  = ad->nz + bd->nz;
2199   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2200   a_jsendbuf = ad->j;
2201   b_jsendbuf = bd->j;
2202   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2203   cnt        = 0;
2204   for (i=0; i<n; i++) {
2205 
2206     /* put in lower diagonal portion */
2207     m = bd->i[i+1] - bd->i[i];
2208     while (m > 0) {
2209       /* is it above diagonal (in bd (compressed) numbering) */
2210       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2211       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2212       m--;
2213     }
2214 
2215     /* put in diagonal portion */
2216     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2217       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2218     }
2219 
2220     /* put in upper diagonal portion */
2221     while (m-- > 0) {
2222       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2223     }
2224   }
2225   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2226 
2227   /*--------------------------------------------------------------------
2228     Gather all column indices to all processors
2229   */
2230   for (i=0; i<size; i++) {
2231     recvcounts[i] = 0;
2232     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2233       recvcounts[i] += lens[j];
2234     }
2235   }
2236   displs[0] = 0;
2237   for (i=1; i<size; i++) {
2238     displs[i] = displs[i-1] + recvcounts[i-1];
2239   }
2240 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2241   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2242 #else
2243   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2244 #endif
2245   /*--------------------------------------------------------------------
2246     Assemble the matrix into useable form (note numerical values not yet set)
2247   */
2248   /* set the b->ilen (length of each row) values */
2249   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2250   /* set the b->i indices */
2251   b->i[0] = 0;
2252   for (i=1; i<=A->rmap->N/bs; i++) {
2253     b->i[i] = b->i[i-1] + lens[i-1];
2254   }
2255   ierr = PetscFree(lens);CHKERRQ(ierr);
2256   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2257   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2258   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2259 
2260   if (A->symmetric) {
2261     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2262   } else if (A->hermitian) {
2263     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2264   } else if (A->structurally_symmetric) {
2265     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2266   }
2267   *newmat = B;
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatSOR_MPIBAIJ"
2273 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2274 {
2275   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2276   PetscErrorCode ierr;
2277   Vec            bb1 = 0;
2278 
2279   PetscFunctionBegin;
2280   if (flag == SOR_APPLY_UPPER) {
2281     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2282     PetscFunctionReturn(0);
2283   }
2284 
2285   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2286     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2287   }
2288 
2289   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2290     if (flag & SOR_ZERO_INITIAL_GUESS) {
2291       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2292       its--;
2293     }
2294 
2295     while (its--) {
2296       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2297       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2298 
2299       /* update rhs: bb1 = bb - B*x */
2300       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2301       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2302 
2303       /* local sweep */
2304       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2305     }
2306   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2307     if (flag & SOR_ZERO_INITIAL_GUESS) {
2308       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2309       its--;
2310     }
2311     while (its--) {
2312       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2313       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2314 
2315       /* update rhs: bb1 = bb - B*x */
2316       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2317       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2318 
2319       /* local sweep */
2320       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2321     }
2322   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2323     if (flag & SOR_ZERO_INITIAL_GUESS) {
2324       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2325       its--;
2326     }
2327     while (its--) {
2328       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2329       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2330 
2331       /* update rhs: bb1 = bb - B*x */
2332       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2333       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2334 
2335       /* local sweep */
2336       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2337     }
2338   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2339 
2340   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 #undef __FUNCT__
2345 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2346 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2347 {
2348   PetscErrorCode ierr;
2349   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2350   PetscInt       N,i,*garray = aij->garray;
2351   PetscInt       ib,jb,bs = A->rmap->bs;
2352   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2353   MatScalar      *a_val = a_aij->a;
2354   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2355   MatScalar      *b_val = b_aij->a;
2356   PetscReal      *work;
2357 
2358   PetscFunctionBegin;
2359   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2360   ierr = PetscMalloc(N*sizeof(PetscReal),&work);CHKERRQ(ierr);
2361   ierr = PetscMemzero(work,N*sizeof(PetscReal));CHKERRQ(ierr);
2362   if (type == NORM_2) {
2363     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2364       for (jb=0; jb<bs; jb++) {
2365         for (ib=0; ib<bs; ib++) {
2366           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2367           a_val++;
2368         }
2369       }
2370     }
2371     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2372       for (jb=0; jb<bs; jb++) {
2373         for (ib=0; ib<bs; ib++) {
2374           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2375           b_val++;
2376         }
2377       }
2378     }
2379   } else if (type == NORM_1) {
2380     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2381       for (jb=0; jb<bs; jb++) {
2382         for (ib=0; ib<bs; ib++) {
2383           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2384           a_val++;
2385         }
2386       }
2387     }
2388     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2389       for (jb=0; jb<bs; jb++) {
2390        for (ib=0; ib<bs; ib++) {
2391           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2392           b_val++;
2393         }
2394       }
2395     }
2396   } else if (type == NORM_INFINITY) {
2397     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2398       for (jb=0; jb<bs; jb++) {
2399         for (ib=0; ib<bs; ib++) {
2400           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2401           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2402           a_val++;
2403         }
2404       }
2405     }
2406     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2407       for (jb=0; jb<bs; jb++) {
2408         for (ib=0; ib<bs; ib++) {
2409           int col = garray[b_aij->j[i]] * bs + jb;
2410           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2411           b_val++;
2412         }
2413       }
2414     }
2415   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2416   if (type == NORM_INFINITY) {
2417     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2418   } else {
2419     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2420   }
2421   ierr = PetscFree(work);CHKERRQ(ierr);
2422   if (type == NORM_2) {
2423     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2424   }
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2430 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2431 {
2432   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2433   PetscErrorCode ierr;
2434 
2435   PetscFunctionBegin;
2436   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 
2441 /* -------------------------------------------------------------------*/
2442 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2443                                        MatGetRow_MPIBAIJ,
2444                                        MatRestoreRow_MPIBAIJ,
2445                                        MatMult_MPIBAIJ,
2446                                 /* 4*/ MatMultAdd_MPIBAIJ,
2447                                        MatMultTranspose_MPIBAIJ,
2448                                        MatMultTransposeAdd_MPIBAIJ,
2449                                        0,
2450                                        0,
2451                                        0,
2452                                 /*10*/ 0,
2453                                        0,
2454                                        0,
2455                                        MatSOR_MPIBAIJ,
2456                                        MatTranspose_MPIBAIJ,
2457                                 /*15*/ MatGetInfo_MPIBAIJ,
2458                                        MatEqual_MPIBAIJ,
2459                                        MatGetDiagonal_MPIBAIJ,
2460                                        MatDiagonalScale_MPIBAIJ,
2461                                        MatNorm_MPIBAIJ,
2462                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2463                                        MatAssemblyEnd_MPIBAIJ,
2464                                        MatSetOption_MPIBAIJ,
2465                                        MatZeroEntries_MPIBAIJ,
2466                                 /*24*/ MatZeroRows_MPIBAIJ,
2467                                        0,
2468                                        0,
2469                                        0,
2470                                        0,
2471                                 /*29*/ MatSetUp_MPIBAIJ,
2472                                        0,
2473                                        0,
2474                                        0,
2475                                        0,
2476                                 /*34*/ MatDuplicate_MPIBAIJ,
2477                                        0,
2478                                        0,
2479                                        0,
2480                                        0,
2481                                 /*39*/ MatAXPY_MPIBAIJ,
2482                                        MatGetSubMatrices_MPIBAIJ,
2483                                        MatIncreaseOverlap_MPIBAIJ,
2484                                        MatGetValues_MPIBAIJ,
2485                                        MatCopy_MPIBAIJ,
2486                                 /*44*/ 0,
2487                                        MatScale_MPIBAIJ,
2488                                        0,
2489                                        0,
2490                                        0,
2491                                 /*49*/ 0,
2492                                        0,
2493                                        0,
2494                                        0,
2495                                        0,
2496                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2497                                        0,
2498                                        MatSetUnfactored_MPIBAIJ,
2499                                        MatPermute_MPIBAIJ,
2500                                        MatSetValuesBlocked_MPIBAIJ,
2501                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2502                                        MatDestroy_MPIBAIJ,
2503                                        MatView_MPIBAIJ,
2504                                        0,
2505                                        0,
2506                                 /*64*/ 0,
2507                                        0,
2508                                        0,
2509                                        0,
2510                                        0,
2511                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2512                                        0,
2513                                        0,
2514                                        0,
2515                                        0,
2516                                 /*74*/ 0,
2517                                        MatFDColoringApply_BAIJ,
2518                                        0,
2519                                        0,
2520                                        0,
2521                                 /*79*/ 0,
2522                                        0,
2523                                        0,
2524                                        0,
2525                                        MatLoad_MPIBAIJ,
2526                                 /*84*/ 0,
2527                                        0,
2528                                        0,
2529                                        0,
2530                                        0,
2531                                 /*89*/ 0,
2532                                        0,
2533                                        0,
2534                                        0,
2535                                        0,
2536                                 /*94*/ 0,
2537                                        0,
2538                                        0,
2539                                        0,
2540                                        0,
2541                                 /*99*/ 0,
2542                                        0,
2543                                        0,
2544                                        0,
2545                                        0,
2546                                 /*104*/0,
2547                                        MatRealPart_MPIBAIJ,
2548                                        MatImaginaryPart_MPIBAIJ,
2549                                        0,
2550                                        0,
2551                                 /*109*/0,
2552                                        0,
2553                                        0,
2554                                        0,
2555                                        0,
2556                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2557                                        0,
2558                                        MatGetGhosts_MPIBAIJ,
2559                                        0,
2560                                        0,
2561                                 /*119*/0,
2562                                        0,
2563                                        0,
2564                                        0,
2565                                        MatGetMultiProcBlock_MPIBAIJ,
2566                                 /*124*/0,
2567                                        MatGetColumnNorms_MPIBAIJ,
2568                                        MatInvertBlockDiagonal_MPIBAIJ,
2569                                        0,
2570                                        0,
2571                                /*129*/ 0,
2572                                        0,
2573                                        0,
2574                                        0,
2575                                        0,
2576                                /*134*/ 0,
2577                                        0,
2578                                        0,
2579                                        0,
2580                                        0,
2581                                /*139*/ 0,
2582                                        0,
2583                                        0,
2584                                        MatFDColoringSetUp_MPIXAIJ
2585 };
2586 
2587 #undef __FUNCT__
2588 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2589 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2590 {
2591   PetscFunctionBegin;
2592   *a = ((Mat_MPIBAIJ*)A->data)->A;
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2597 
2598 #undef __FUNCT__
2599 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2600 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2601 {
2602   PetscInt       m,rstart,cstart,cend;
2603   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2604   const PetscInt *JJ    =0;
2605   PetscScalar    *values=0;
2606   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2607   PetscErrorCode ierr;
2608 
2609   PetscFunctionBegin;
2610   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2611   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2612   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2613   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2614   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2615   m      = B->rmap->n/bs;
2616   rstart = B->rmap->rstart/bs;
2617   cstart = B->cmap->rstart/bs;
2618   cend   = B->cmap->rend/bs;
2619 
2620   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2621   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2622   for (i=0; i<m; i++) {
2623     nz = ii[i+1] - ii[i];
2624     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2625     nz_max = PetscMax(nz_max,nz);
2626     JJ     = jj + ii[i];
2627     for (j=0; j<nz; j++) {
2628       if (*JJ >= cstart) break;
2629       JJ++;
2630     }
2631     d = 0;
2632     for (; j<nz; j++) {
2633       if (*JJ++ >= cend) break;
2634       d++;
2635     }
2636     d_nnz[i] = d;
2637     o_nnz[i] = nz - d;
2638   }
2639   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2640   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2641 
2642   values = (PetscScalar*)V;
2643   if (!values) {
2644     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2645     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2646   }
2647   for (i=0; i<m; i++) {
2648     PetscInt          row    = i + rstart;
2649     PetscInt          ncols  = ii[i+1] - ii[i];
2650     const PetscInt    *icols = jj + ii[i];
2651     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2652       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2653       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2654     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2655       PetscInt j;
2656       for (j=0; j<ncols; j++) {
2657         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2658         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2659       }
2660     }
2661   }
2662 
2663   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2664   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2665   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2666   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2667   PetscFunctionReturn(0);
2668 }
2669 
2670 #undef __FUNCT__
2671 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2672 /*@C
2673    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2674    (the default parallel PETSc format).
2675 
2676    Collective on MPI_Comm
2677 
2678    Input Parameters:
2679 +  A - the matrix
2680 .  bs - the block size
2681 .  i - the indices into j for the start of each local row (starts with zero)
2682 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2683 -  v - optional values in the matrix
2684 
2685    Level: developer
2686 
2687    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2688    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2689    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2690    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2691    block column and the second index is over columns within a block.
2692 
2693 .keywords: matrix, aij, compressed row, sparse, parallel
2694 
2695 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2696 @*/
2697 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2698 {
2699   PetscErrorCode ierr;
2700 
2701   PetscFunctionBegin;
2702   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2703   PetscValidType(B,1);
2704   PetscValidLogicalCollectiveInt(B,bs,2);
2705   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2706   PetscFunctionReturn(0);
2707 }
2708 
2709 #undef __FUNCT__
2710 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2711 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2712 {
2713   Mat_MPIBAIJ    *b;
2714   PetscErrorCode ierr;
2715   PetscInt       i;
2716 
2717   PetscFunctionBegin;
2718   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2719   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2720   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2721   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2722   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2723 
2724   if (d_nnz) {
2725     for (i=0; i<B->rmap->n/bs; i++) {
2726       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2727     }
2728   }
2729   if (o_nnz) {
2730     for (i=0; i<B->rmap->n/bs; i++) {
2731       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2732     }
2733   }
2734 
2735   b      = (Mat_MPIBAIJ*)B->data;
2736   b->bs2 = bs*bs;
2737   b->mbs = B->rmap->n/bs;
2738   b->nbs = B->cmap->n/bs;
2739   b->Mbs = B->rmap->N/bs;
2740   b->Nbs = B->cmap->N/bs;
2741 
2742   for (i=0; i<=b->size; i++) {
2743     b->rangebs[i] = B->rmap->range[i]/bs;
2744   }
2745   b->rstartbs = B->rmap->rstart/bs;
2746   b->rendbs   = B->rmap->rend/bs;
2747   b->cstartbs = B->cmap->rstart/bs;
2748   b->cendbs   = B->cmap->rend/bs;
2749 
2750   if (!B->preallocated) {
2751     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2752     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2753     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2754     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2755     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2756     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2757     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2758     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2759     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2760   }
2761 
2762   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2763   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2764   B->preallocated = PETSC_TRUE;
2765   PetscFunctionReturn(0);
2766 }
2767 
2768 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2769 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2770 
2771 #undef __FUNCT__
2772 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2773 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2774 {
2775   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2776   PetscErrorCode ierr;
2777   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2778   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2779   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2780 
2781   PetscFunctionBegin;
2782   ierr  = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
2783   ii[0] = 0;
2784   for (i=0; i<M; i++) {
2785     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2786     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2787     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2788     /* remove one from count of matrix has diagonal */
2789     for (j=id[i]; j<id[i+1]; j++) {
2790       if (jd[j] == i) {ii[i+1]--;break;}
2791     }
2792   }
2793   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2794   cnt  = 0;
2795   for (i=0; i<M; i++) {
2796     for (j=io[i]; j<io[i+1]; j++) {
2797       if (garray[jo[j]] > rstart) break;
2798       jj[cnt++] = garray[jo[j]];
2799     }
2800     for (k=id[i]; k<id[i+1]; k++) {
2801       if (jd[k] != i) {
2802         jj[cnt++] = rstart + jd[k];
2803       }
2804     }
2805     for (; j<io[i+1]; j++) {
2806       jj[cnt++] = garray[jo[j]];
2807     }
2808   }
2809   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2810   PetscFunctionReturn(0);
2811 }
2812 
2813 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2814 
2815 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2816 
2817 #undef __FUNCT__
2818 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
2819 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2820 {
2821   PetscErrorCode ierr;
2822   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2823   Mat            B;
2824   Mat_MPIAIJ     *b;
2825 
2826   PetscFunctionBegin;
2827   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2828 
2829   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2830   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2831   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2832   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2833   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2834   b    = (Mat_MPIAIJ*) B->data;
2835 
2836   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2837   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2838   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2839   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2840   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2841   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2842   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2843   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2844   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2845   if (reuse == MAT_REUSE_MATRIX) {
2846     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2847   } else {
2848    *newmat = B;
2849   }
2850   PetscFunctionReturn(0);
2851 }
2852 
2853 #if defined(PETSC_HAVE_MUMPS)
2854 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2855 #endif
2856 
2857 /*MC
2858    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2859 
2860    Options Database Keys:
2861 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2862 . -mat_block_size <bs> - set the blocksize used to store the matrix
2863 - -mat_use_hash_table <fact>
2864 
2865   Level: beginner
2866 
2867 .seealso: MatCreateMPIBAIJ
2868 M*/
2869 
2870 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2871 
2872 #undef __FUNCT__
2873 #define __FUNCT__ "MatCreate_MPIBAIJ"
2874 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2875 {
2876   Mat_MPIBAIJ    *b;
2877   PetscErrorCode ierr;
2878   PetscBool      flg;
2879 
2880   PetscFunctionBegin;
2881   ierr    = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2882   B->data = (void*)b;
2883 
2884   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2885   B->assembled = PETSC_FALSE;
2886 
2887   B->insertmode = NOT_SET_VALUES;
2888   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2889   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2890 
2891   /* build local table of row and column ownerships */
2892   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2893 
2894   /* build cache for off array entries formed */
2895   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2896 
2897   b->donotstash  = PETSC_FALSE;
2898   b->colmap      = NULL;
2899   b->garray      = NULL;
2900   b->roworiented = PETSC_TRUE;
2901 
2902   /* stuff used in block assembly */
2903   b->barray = 0;
2904 
2905   /* stuff used for matrix vector multiply */
2906   b->lvec  = 0;
2907   b->Mvctx = 0;
2908 
2909   /* stuff for MatGetRow() */
2910   b->rowindices   = 0;
2911   b->rowvalues    = 0;
2912   b->getrowactive = PETSC_FALSE;
2913 
2914   /* hash table stuff */
2915   b->ht           = 0;
2916   b->hd           = 0;
2917   b->ht_size      = 0;
2918   b->ht_flag      = PETSC_FALSE;
2919   b->ht_fact      = 0;
2920   b->ht_total_ct  = 0;
2921   b->ht_insert_ct = 0;
2922 
2923   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
2924   b->ijonly = PETSC_FALSE;
2925 
2926   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2927   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
2928   if (flg) {
2929     PetscReal fact = 1.39;
2930     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2931     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2932     if (fact <= 1.0) fact = 1.39;
2933     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2934     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2935   }
2936   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2937 
2938 #if defined(PETSC_HAVE_MUMPS)
2939   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr);
2940 #endif
2941   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2942   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2943   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
2944   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2945   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2946   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2947   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2948   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2949   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2950   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2951   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
2952   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2953   PetscFunctionReturn(0);
2954 }
2955 
2956 /*MC
2957    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2958 
2959    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2960    and MATMPIBAIJ otherwise.
2961 
2962    Options Database Keys:
2963 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2964 
2965   Level: beginner
2966 
2967 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2968 M*/
2969 
2970 #undef __FUNCT__
2971 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2972 /*@C
2973    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2974    (block compressed row).  For good matrix assembly performance
2975    the user should preallocate the matrix storage by setting the parameters
2976    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2977    performance can be increased by more than a factor of 50.
2978 
2979    Collective on Mat
2980 
2981    Input Parameters:
2982 +  A - the matrix
2983 .  bs   - size of block
2984 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2985            submatrix  (same for all local rows)
2986 .  d_nnz - array containing the number of block nonzeros in the various block rows
2987            of the in diagonal portion of the local (possibly different for each block
2988            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
2989            set it even if it is zero.
2990 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2991            submatrix (same for all local rows).
2992 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2993            off-diagonal portion of the local submatrix (possibly different for
2994            each block row) or NULL.
2995 
2996    If the *_nnz parameter is given then the *_nz parameter is ignored
2997 
2998    Options Database Keys:
2999 +   -mat_block_size - size of the blocks to use
3000 -   -mat_use_hash_table <fact>
3001 
3002    Notes:
3003    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3004    than it must be used on all processors that share the object for that argument.
3005 
3006    Storage Information:
3007    For a square global matrix we define each processor's diagonal portion
3008    to be its local rows and the corresponding columns (a square submatrix);
3009    each processor's off-diagonal portion encompasses the remainder of the
3010    local matrix (a rectangular submatrix).
3011 
3012    The user can specify preallocated storage for the diagonal part of
3013    the local submatrix with either d_nz or d_nnz (not both).  Set
3014    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3015    memory allocation.  Likewise, specify preallocated storage for the
3016    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3017 
3018    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3019    the figure below we depict these three local rows and all columns (0-11).
3020 
3021 .vb
3022            0 1 2 3 4 5 6 7 8 9 10 11
3023           --------------------------
3024    row 3  |o o o d d d o o o o  o  o
3025    row 4  |o o o d d d o o o o  o  o
3026    row 5  |o o o d d d o o o o  o  o
3027           --------------------------
3028 .ve
3029 
3030    Thus, any entries in the d locations are stored in the d (diagonal)
3031    submatrix, and any entries in the o locations are stored in the
3032    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3033    stored simply in the MATSEQBAIJ format for compressed row storage.
3034 
3035    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3036    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3037    In general, for PDE problems in which most nonzeros are near the diagonal,
3038    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3039    or you will get TERRIBLE performance; see the users' manual chapter on
3040    matrices.
3041 
3042    You can call MatGetInfo() to get information on how effective the preallocation was;
3043    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3044    You can also run with the option -info and look for messages with the string
3045    malloc in them to see if additional memory allocation was needed.
3046 
3047    Level: intermediate
3048 
3049 .keywords: matrix, block, aij, compressed row, sparse, parallel
3050 
3051 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3052 @*/
3053 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3054 {
3055   PetscErrorCode ierr;
3056 
3057   PetscFunctionBegin;
3058   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3059   PetscValidType(B,1);
3060   PetscValidLogicalCollectiveInt(B,bs,2);
3061   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3062   PetscFunctionReturn(0);
3063 }
3064 
3065 #undef __FUNCT__
3066 #define __FUNCT__ "MatCreateBAIJ"
3067 /*@C
3068    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3069    (block compressed row).  For good matrix assembly performance
3070    the user should preallocate the matrix storage by setting the parameters
3071    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3072    performance can be increased by more than a factor of 50.
3073 
3074    Collective on MPI_Comm
3075 
3076    Input Parameters:
3077 +  comm - MPI communicator
3078 .  bs   - size of blockk
3079 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3080            This value should be the same as the local size used in creating the
3081            y vector for the matrix-vector product y = Ax.
3082 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3083            This value should be the same as the local size used in creating the
3084            x vector for the matrix-vector product y = Ax.
3085 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3086 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3087 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3088            submatrix  (same for all local rows)
3089 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3090            of the in diagonal portion of the local (possibly different for each block
3091            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3092            and set it even if it is zero.
3093 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3094            submatrix (same for all local rows).
3095 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3096            off-diagonal portion of the local submatrix (possibly different for
3097            each block row) or NULL.
3098 
3099    Output Parameter:
3100 .  A - the matrix
3101 
3102    Options Database Keys:
3103 +   -mat_block_size - size of the blocks to use
3104 -   -mat_use_hash_table <fact>
3105 
3106    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3107    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3108    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3109 
3110    Notes:
3111    If the *_nnz parameter is given then the *_nz parameter is ignored
3112 
3113    A nonzero block is any block that as 1 or more nonzeros in it
3114 
3115    The user MUST specify either the local or global matrix dimensions
3116    (possibly both).
3117 
3118    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3119    than it must be used on all processors that share the object for that argument.
3120 
3121    Storage Information:
3122    For a square global matrix we define each processor's diagonal portion
3123    to be its local rows and the corresponding columns (a square submatrix);
3124    each processor's off-diagonal portion encompasses the remainder of the
3125    local matrix (a rectangular submatrix).
3126 
3127    The user can specify preallocated storage for the diagonal part of
3128    the local submatrix with either d_nz or d_nnz (not both).  Set
3129    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3130    memory allocation.  Likewise, specify preallocated storage for the
3131    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3132 
3133    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3134    the figure below we depict these three local rows and all columns (0-11).
3135 
3136 .vb
3137            0 1 2 3 4 5 6 7 8 9 10 11
3138           --------------------------
3139    row 3  |o o o d d d o o o o  o  o
3140    row 4  |o o o d d d o o o o  o  o
3141    row 5  |o o o d d d o o o o  o  o
3142           --------------------------
3143 .ve
3144 
3145    Thus, any entries in the d locations are stored in the d (diagonal)
3146    submatrix, and any entries in the o locations are stored in the
3147    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3148    stored simply in the MATSEQBAIJ format for compressed row storage.
3149 
3150    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3151    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3152    In general, for PDE problems in which most nonzeros are near the diagonal,
3153    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3154    or you will get TERRIBLE performance; see the users' manual chapter on
3155    matrices.
3156 
3157    Level: intermediate
3158 
3159 .keywords: matrix, block, aij, compressed row, sparse, parallel
3160 
3161 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3162 @*/
3163 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3164 {
3165   PetscErrorCode ierr;
3166   PetscMPIInt    size;
3167 
3168   PetscFunctionBegin;
3169   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3170   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3171   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3172   if (size > 1) {
3173     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3174     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3175   } else {
3176     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3177     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3178   }
3179   PetscFunctionReturn(0);
3180 }
3181 
3182 #undef __FUNCT__
3183 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3184 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3185 {
3186   Mat            mat;
3187   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3188   PetscErrorCode ierr;
3189   PetscInt       len=0;
3190 
3191   PetscFunctionBegin;
3192   *newmat = 0;
3193   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3194   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3195   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3196   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3197 
3198   mat->factortype   = matin->factortype;
3199   mat->preallocated = PETSC_TRUE;
3200   mat->assembled    = PETSC_TRUE;
3201   mat->insertmode   = NOT_SET_VALUES;
3202 
3203   a             = (Mat_MPIBAIJ*)mat->data;
3204   mat->rmap->bs = matin->rmap->bs;
3205   a->bs2        = oldmat->bs2;
3206   a->mbs        = oldmat->mbs;
3207   a->nbs        = oldmat->nbs;
3208   a->Mbs        = oldmat->Mbs;
3209   a->Nbs        = oldmat->Nbs;
3210 
3211   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3212   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3213 
3214   a->size         = oldmat->size;
3215   a->rank         = oldmat->rank;
3216   a->donotstash   = oldmat->donotstash;
3217   a->roworiented  = oldmat->roworiented;
3218   a->rowindices   = 0;
3219   a->rowvalues    = 0;
3220   a->getrowactive = PETSC_FALSE;
3221   a->barray       = 0;
3222   a->rstartbs     = oldmat->rstartbs;
3223   a->rendbs       = oldmat->rendbs;
3224   a->cstartbs     = oldmat->cstartbs;
3225   a->cendbs       = oldmat->cendbs;
3226 
3227   /* hash table stuff */
3228   a->ht           = 0;
3229   a->hd           = 0;
3230   a->ht_size      = 0;
3231   a->ht_flag      = oldmat->ht_flag;
3232   a->ht_fact      = oldmat->ht_fact;
3233   a->ht_total_ct  = 0;
3234   a->ht_insert_ct = 0;
3235 
3236   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3237   if (oldmat->colmap) {
3238 #if defined(PETSC_USE_CTABLE)
3239     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3240 #else
3241     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3242     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3243     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3244 #endif
3245   } else a->colmap = 0;
3246 
3247   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3248     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3249     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3250     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3251   } else a->garray = 0;
3252 
3253   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3254   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3255   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3256   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3257   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3258 
3259   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3260   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3261   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3262   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3263   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3264   *newmat = mat;
3265   PetscFunctionReturn(0);
3266 }
3267 
3268 #undef __FUNCT__
3269 #define __FUNCT__ "MatLoad_MPIBAIJ"
3270 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3271 {
3272   PetscErrorCode ierr;
3273   int            fd;
3274   PetscInt       i,nz,j,rstart,rend;
3275   PetscScalar    *vals,*buf;
3276   MPI_Comm       comm;
3277   MPI_Status     status;
3278   PetscMPIInt    rank,size,maxnz;
3279   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3280   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3281   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3282   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3283   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3284   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3285 
3286   PetscFunctionBegin;
3287   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3288   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3289   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3290   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3291 
3292   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3293   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3294   if (!rank) {
3295     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3296     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3297     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3298   }
3299 
3300   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3301 
3302   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3303   M    = header[1]; N = header[2];
3304 
3305   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3306   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3307   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3308 
3309   /* If global sizes are set, check if they are consistent with that given in the file */
3310   if (sizesset) {
3311     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3312   }
3313   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3314   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3315 
3316   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3317 
3318   /*
3319      This code adds extra rows to make sure the number of rows is
3320      divisible by the blocksize
3321   */
3322   Mbs        = M/bs;
3323   extra_rows = bs - M + bs*Mbs;
3324   if (extra_rows == bs) extra_rows = 0;
3325   else                  Mbs++;
3326   if (extra_rows && !rank) {
3327     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3328   }
3329 
3330   /* determine ownership of all rows */
3331   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3332     mbs = Mbs/size + ((Mbs % size) > rank);
3333     m   = mbs*bs;
3334   } else { /* User set */
3335     m   = newmat->rmap->n;
3336     mbs = m/bs;
3337   }
3338   ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3339   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3340 
3341   /* process 0 needs enough room for process with most rows */
3342   if (!rank) {
3343     mmax = rowners[1];
3344     for (i=2; i<=size; i++) {
3345       mmax = PetscMax(mmax,rowners[i]);
3346     }
3347     mmax*=bs;
3348   } else mmax = -1;             /* unused, but compiler warns anyway */
3349 
3350   rowners[0] = 0;
3351   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3352   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3353   rstart = rowners[rank];
3354   rend   = rowners[rank+1];
3355 
3356   /* distribute row lengths to all processors */
3357   ierr = PetscMalloc(m*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3358   if (!rank) {
3359     mend = m;
3360     if (size == 1) mend = mend - extra_rows;
3361     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3362     for (j=mend; j<m; j++) locrowlens[j] = 1;
3363     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3364     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3365     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3366     for (j=0; j<m; j++) {
3367       procsnz[0] += locrowlens[j];
3368     }
3369     for (i=1; i<size; i++) {
3370       mend = browners[i+1] - browners[i];
3371       if (i == size-1) mend = mend - extra_rows;
3372       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3373       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3374       /* calculate the number of nonzeros on each processor */
3375       for (j=0; j<browners[i+1]-browners[i]; j++) {
3376         procsnz[i] += rowlengths[j];
3377       }
3378       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3379     }
3380     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3381   } else {
3382     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3383   }
3384 
3385   if (!rank) {
3386     /* determine max buffer needed and allocate it */
3387     maxnz = procsnz[0];
3388     for (i=1; i<size; i++) {
3389       maxnz = PetscMax(maxnz,procsnz[i]);
3390     }
3391     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3392 
3393     /* read in my part of the matrix column indices  */
3394     nz     = procsnz[0];
3395     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3396     mycols = ibuf;
3397     if (size == 1) nz -= extra_rows;
3398     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3399     if (size == 1) {
3400       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3401     }
3402 
3403     /* read in every ones (except the last) and ship off */
3404     for (i=1; i<size-1; i++) {
3405       nz   = procsnz[i];
3406       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3407       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3408     }
3409     /* read in the stuff for the last proc */
3410     if (size != 1) {
3411       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3412       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3413       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3414       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3415     }
3416     ierr = PetscFree(cols);CHKERRQ(ierr);
3417   } else {
3418     /* determine buffer space needed for message */
3419     nz = 0;
3420     for (i=0; i<m; i++) {
3421       nz += locrowlens[i];
3422     }
3423     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3424     mycols = ibuf;
3425     /* receive message of column indices*/
3426     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3427     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3428     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3429   }
3430 
3431   /* loop over local rows, determining number of off diagonal entries */
3432   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3433   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3434   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3435   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3436   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3437   rowcount = 0; nzcount = 0;
3438   for (i=0; i<mbs; i++) {
3439     dcount  = 0;
3440     odcount = 0;
3441     for (j=0; j<bs; j++) {
3442       kmax = locrowlens[rowcount];
3443       for (k=0; k<kmax; k++) {
3444         tmp = mycols[nzcount++]/bs;
3445         if (!mask[tmp]) {
3446           mask[tmp] = 1;
3447           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3448           else masked1[dcount++] = tmp;
3449         }
3450       }
3451       rowcount++;
3452     }
3453 
3454     dlens[i]  = dcount;
3455     odlens[i] = odcount;
3456 
3457     /* zero out the mask elements we set */
3458     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3459     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3460   }
3461 
3462 
3463   if (!sizesset) {
3464     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3465   }
3466   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3467 
3468   if (!rank) {
3469     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3470     /* read in my part of the matrix numerical values  */
3471     nz     = procsnz[0];
3472     vals   = buf;
3473     mycols = ibuf;
3474     if (size == 1) nz -= extra_rows;
3475     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3476     if (size == 1) {
3477       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3478     }
3479 
3480     /* insert into matrix */
3481     jj = rstart*bs;
3482     for (i=0; i<m; i++) {
3483       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3484       mycols += locrowlens[i];
3485       vals   += locrowlens[i];
3486       jj++;
3487     }
3488     /* read in other processors (except the last one) and ship out */
3489     for (i=1; i<size-1; i++) {
3490       nz   = procsnz[i];
3491       vals = buf;
3492       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3493       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3494     }
3495     /* the last proc */
3496     if (size != 1) {
3497       nz   = procsnz[i] - extra_rows;
3498       vals = buf;
3499       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3500       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3501       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3502     }
3503     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3504   } else {
3505     /* receive numeric values */
3506     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3507 
3508     /* receive message of values*/
3509     vals   = buf;
3510     mycols = ibuf;
3511     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3512 
3513     /* insert into matrix */
3514     jj = rstart*bs;
3515     for (i=0; i<m; i++) {
3516       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3517       mycols += locrowlens[i];
3518       vals   += locrowlens[i];
3519       jj++;
3520     }
3521   }
3522   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3523   ierr = PetscFree(buf);CHKERRQ(ierr);
3524   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3525   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3526   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3527   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3528   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3529   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3530   PetscFunctionReturn(0);
3531 }
3532 
3533 #undef __FUNCT__
3534 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3535 /*@
3536    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3537 
3538    Input Parameters:
3539 .  mat  - the matrix
3540 .  fact - factor
3541 
3542    Not Collective, each process can use a different factor
3543 
3544    Level: advanced
3545 
3546   Notes:
3547    This can also be set by the command line option: -mat_use_hash_table <fact>
3548 
3549 .keywords: matrix, hashtable, factor, HT
3550 
3551 .seealso: MatSetOption()
3552 @*/
3553 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3554 {
3555   PetscErrorCode ierr;
3556 
3557   PetscFunctionBegin;
3558   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3559   PetscFunctionReturn(0);
3560 }
3561 
3562 #undef __FUNCT__
3563 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3564 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3565 {
3566   Mat_MPIBAIJ *baij;
3567 
3568   PetscFunctionBegin;
3569   baij          = (Mat_MPIBAIJ*)mat->data;
3570   baij->ht_fact = fact;
3571   PetscFunctionReturn(0);
3572 }
3573 
3574 #undef __FUNCT__
3575 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3576 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3577 {
3578   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3579 
3580   PetscFunctionBegin;
3581   *Ad     = a->A;
3582   *Ao     = a->B;
3583   *colmap = a->garray;
3584   PetscFunctionReturn(0);
3585 }
3586 
3587 /*
3588     Special version for direct calls from Fortran (to eliminate two function call overheads
3589 */
3590 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3591 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3592 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3593 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3594 #endif
3595 
3596 #undef __FUNCT__
3597 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3598 /*@C
3599   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3600 
3601   Collective on Mat
3602 
3603   Input Parameters:
3604 + mat - the matrix
3605 . min - number of input rows
3606 . im - input rows
3607 . nin - number of input columns
3608 . in - input columns
3609 . v - numerical values input
3610 - addvin - INSERT_VALUES or ADD_VALUES
3611 
3612   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3613 
3614   Level: advanced
3615 
3616 .seealso:   MatSetValuesBlocked()
3617 @*/
3618 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3619 {
3620   /* convert input arguments to C version */
3621   Mat        mat  = *matin;
3622   PetscInt   m    = *min, n = *nin;
3623   InsertMode addv = *addvin;
3624 
3625   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3626   const MatScalar *value;
3627   MatScalar       *barray     = baij->barray;
3628   PetscBool       roworiented = baij->roworiented;
3629   PetscErrorCode  ierr;
3630   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3631   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3632   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3633 
3634   PetscFunctionBegin;
3635   /* tasks normally handled by MatSetValuesBlocked() */
3636   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3637 #if defined(PETSC_USE_DEBUG)
3638   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3639   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3640 #endif
3641   if (mat->assembled) {
3642     mat->was_assembled = PETSC_TRUE;
3643     mat->assembled     = PETSC_FALSE;
3644   }
3645   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3646 
3647 
3648   if (!barray) {
3649     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3650     baij->barray = barray;
3651   }
3652 
3653   if (roworiented) stepval = (n-1)*bs;
3654   else stepval = (m-1)*bs;
3655 
3656   for (i=0; i<m; i++) {
3657     if (im[i] < 0) continue;
3658 #if defined(PETSC_USE_DEBUG)
3659     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3660 #endif
3661     if (im[i] >= rstart && im[i] < rend) {
3662       row = im[i] - rstart;
3663       for (j=0; j<n; j++) {
3664         /* If NumCol = 1 then a copy is not required */
3665         if ((roworiented) && (n == 1)) {
3666           barray = (MatScalar*)v + i*bs2;
3667         } else if ((!roworiented) && (m == 1)) {
3668           barray = (MatScalar*)v + j*bs2;
3669         } else { /* Here a copy is required */
3670           if (roworiented) {
3671             value = v + i*(stepval+bs)*bs + j*bs;
3672           } else {
3673             value = v + j*(stepval+bs)*bs + i*bs;
3674           }
3675           for (ii=0; ii<bs; ii++,value+=stepval) {
3676             for (jj=0; jj<bs; jj++) {
3677               *barray++ = *value++;
3678             }
3679           }
3680           barray -=bs2;
3681         }
3682 
3683         if (in[j] >= cstart && in[j] < cend) {
3684           col  = in[j] - cstart;
3685           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3686         } else if (in[j] < 0) continue;
3687 #if defined(PETSC_USE_DEBUG)
3688         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3689 #endif
3690         else {
3691           if (mat->was_assembled) {
3692             if (!baij->colmap) {
3693               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3694             }
3695 
3696 #if defined(PETSC_USE_DEBUG)
3697 #if defined(PETSC_USE_CTABLE)
3698             { PetscInt data;
3699               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3700               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3701             }
3702 #else
3703             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3704 #endif
3705 #endif
3706 #if defined(PETSC_USE_CTABLE)
3707             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3708             col  = (col - 1)/bs;
3709 #else
3710             col = (baij->colmap[in[j]] - 1)/bs;
3711 #endif
3712             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3713               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3714               col  =  in[j];
3715             }
3716           } else col = in[j];
3717           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3718         }
3719       }
3720     } else {
3721       if (!baij->donotstash) {
3722         if (roworiented) {
3723           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3724         } else {
3725           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3726         }
3727       }
3728     }
3729   }
3730 
3731   /* task normally handled by MatSetValuesBlocked() */
3732   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3733   PetscFunctionReturn(0);
3734 }
3735 
3736 #undef __FUNCT__
3737 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3738 /*@
3739      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3740          CSR format the local rows.
3741 
3742    Collective on MPI_Comm
3743 
3744    Input Parameters:
3745 +  comm - MPI communicator
3746 .  bs - the block size, only a block size of 1 is supported
3747 .  m - number of local rows (Cannot be PETSC_DECIDE)
3748 .  n - This value should be the same as the local size used in creating the
3749        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3750        calculated if N is given) For square matrices n is almost always m.
3751 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3752 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3753 .   i - row indices
3754 .   j - column indices
3755 -   a - matrix values
3756 
3757    Output Parameter:
3758 .   mat - the matrix
3759 
3760    Level: intermediate
3761 
3762    Notes:
3763        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3764      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3765      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3766 
3767      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3768      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3769      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3770      with column-major ordering within blocks.
3771 
3772        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3773 
3774 .keywords: matrix, aij, compressed row, sparse, parallel
3775 
3776 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3777           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3778 @*/
3779 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3780 {
3781   PetscErrorCode ierr;
3782 
3783   PetscFunctionBegin;
3784   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3785   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3786   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3787   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3788   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3789   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3790   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3791   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3792   PetscFunctionReturn(0);
3793 }
3794